Пример #1
0
class DuelLexer(RegexLexer):
    """
    Lexer for Duel Views Engine (formerly JBST) markup with JavaScript code blocks.
    See http://duelengine.org/.
    See http://jsonml.org/jbst/.

    .. versionadded:: 1.4
    """

    name = 'Duel'
    aliases = ['duel', 'jbst', 'jsonml+bst']
    filenames = ['*.duel', '*.jbst']
    mimetypes = ['text/x-duel', 'text/x-jbst']

    flags = re.DOTALL

    tokens = {
        'root': [
            (r'(<%[@=#!:]?)(.*?)(%>)',
             bygroups(Name.Tag, using(JavascriptLexer), Name.Tag)),
            (r'(<%\$)(.*?)(:)(.*?)(%>)',
             bygroups(Name.Tag, Name.Function, Punctuation, String, Name.Tag)),
            (r'(<%--)(.*?)(--%>)',
             bygroups(Name.Tag, Comment.Multiline, Name.Tag)),
            (r'(<script.*?>)(.*?)(</script>)',
             bygroups(using(HtmlLexer), using(JavascriptLexer),
                      using(HtmlLexer))),
            (r'(.+?)(?=<)', using(HtmlLexer)),
            (r'.+', using(HtmlLexer)),
        ],
    }
Пример #2
0
class BBCodeLexer(RegexLexer):
    """
    A lexer that highlights BBCode(-like) syntax.

    .. versionadded:: 0.6
    """

    name = 'BBCode'
    aliases = ['bbcode']
    mimetypes = ['text/x-bbcode']

    tokens = {
        'root': [
            (r'[^[]+', Text),
            # tag/end tag begin
            (r'\[/?\w+', Keyword, 'tag'),
            # stray bracket
            (r'\[', Text),
        ],
        'tag': [
            (r'\s+', Text),
            # attribute with value
            (r'(\w+)(=)("?[^\s"\]]+"?)',
             bygroups(Name.Attribute, Operator, String)),
            # tag argument (a la [color=green])
            (r'(=)("?[^\s"\]]+"?)', bygroups(Operator, String)),
            # tag end
            (r'\]', Keyword, '#pop'),
        ],
    }
Пример #3
0
class TypoScriptCssDataLexer(RegexLexer):
    """
    Lexer that highlights markers, constants and registers within css blocks.

    .. versionadded:: 2.2
    """

    name = 'TypoScriptCssData'
    aliases = ['typoscriptcssdata']

    tokens = {
        'root': [
            # marker: ###MARK###
            (r'(.*)(###\w+###)(.*)', bygroups(String, Name.Constant, String)),
            # constant: {$some.constant}
            (r'(\{)(\$)((?:[\w\-]+\.)*)([\w\-]+)(\})',
             bygroups(String.Symbol, Operator, Name.Constant, Name.Constant,
                      String.Symbol)),  # constant
            # constant: {register:somevalue}
            (r'(.*)(\{)([\w\-]+)(\s*:\s*)([\w\-]+)(\})(.*)',
             bygroups(String, String.Symbol, Name.Constant, Operator,
                      Name.Constant, String.Symbol, String)),  # constant
            # whitespace
            (r'\s+', Text),
            # comments
            (r'/\*(?:(?!\*/).)*\*/', Comment),
            (r'(?<!(#|\'|"))(?:#(?!(?:[a-fA-F0-9]{6}|[a-fA-F0-9]{3}))[^\n#]+|//[^\n]*)',
             Comment),
            # other
            (r'[<>,:=.*%+|]', String),
            (r'[\w"\-!/&;(){}]+', String),
        ]
    }
Пример #4
0
class TypoScriptHtmlDataLexer(RegexLexer):
    """
    Lexer that highlights markers, constants and registers within html tags.

    .. versionadded:: 2.2
    """

    name = 'TypoScriptHtmlData'
    aliases = ['typoscripthtmldata']

    tokens = {
        'root': [
            # INCLUDE_TYPOSCRIPT
            (r'(INCLUDE_TYPOSCRIPT)', Name.Class),
            # Language label or extension resource FILE:... or LLL:... or EXT:...
            (r'(EXT|FILE|LLL):[^}\n"]*', String),
            # marker: ###MARK###
            (r'(.*)(###\w+###)(.*)', bygroups(String, Name.Constant, String)),
            # constant: {$some.constant}
            (r'(\{)(\$)((?:[\w\-]+\.)*)([\w\-]+)(\})',
             bygroups(String.Symbol, Operator, Name.Constant, Name.Constant,
                      String.Symbol)),  # constant
            # constant: {register:somevalue}
            (r'(.*)(\{)([\w\-]+)(\s*:\s*)([\w\-]+)(\})(.*)',
             bygroups(String, String.Symbol, Name.Constant, Operator,
                      Name.Constant, String.Symbol, String)),  # constant
            # whitespace
            (r'\s+', Text),
            # other
            (r'[<>,:=.*%+|]', String),
            (r'[\w"\-!/&;(){}#]+', String),
        ]
    }
Пример #5
0
class GettextLexer(RegexLexer):
    """
    Lexer for Gettext catalog files.

    .. versionadded:: 0.9
    """
    name = 'Gettext Catalog'
    aliases = ['pot', 'po']
    filenames = ['*.pot', '*.po']
    mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext']

    tokens = {
        'root': [
            (r'^#,\s.*?$', Keyword.Type),
            (r'^#:\s.*?$', Keyword.Declaration),
            # (r'^#$', Comment),
            (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single),
            (r'^(")([A-Za-z-]+:)(.*")$', bygroups(String, Name.Property,
                                                  String)),
            (r'^".*"$', String),
            (r'^(msgid|msgid_plural|msgstr|msgctxt)(\s+)(".*")$',
             bygroups(Name.Variable, Text, String)),
            (r'^(msgstr\[)(\d)(\])(\s+)(".*")$',
             bygroups(Name.Variable, Number.Integer, Name.Variable, Text,
                      String)),
        ]
    }
Пример #6
0
class ProtoBufLexer(RegexLexer):
    """
    Lexer for `Protocol Buffer <http://code.google.com/p/protobuf/>`_
    definition files.

    .. versionadded:: 1.4
    """

    name = 'Protocol Buffer'
    aliases = ['protobuf', 'proto']
    filenames = ['*.proto']

    tokens = {
        'root': [
            (r'[ \t]+', Text),
            (r'[,;{}\[\]()<>]', Punctuation),
            (r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single),
            (r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment.Multiline),
            (words(('import', 'option', 'optional', 'required', 'repeated',
                    'reserved', 'default', 'packed', 'ctype', 'extensions',
                    'to', 'max', 'rpc', 'returns', 'oneof'),
                   prefix=r'\b',
                   suffix=r'\b'), Keyword),
            (words(('int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64',
                    'fixed32', 'fixed64', 'sfixed32', 'sfixed64', 'float',
                    'double', 'bool', 'string', 'bytes'),
                   suffix=r'\b'), Keyword.Type),
            (r'(true|false)\b', Keyword.Constant),
            (r'(package)(\s+)', bygroups(Keyword.Namespace, Text), 'package'),
            (r'(message|extend)(\s+)', bygroups(Keyword.Declaration,
                                                Text), 'message'),
            (r'(enum|group|service)(\s+)', bygroups(Keyword.Declaration,
                                                    Text), 'type'),
            (r'\".*?\"', String),
            (r'\'.*?\'', String),
            (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float),
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'(\-?(inf|nan))\b', Number.Float),
            (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex),
            (r'0[0-7]+[LlUu]*', Number.Oct),
            (r'\d+[LlUu]*', Number.Integer),
            (r'[+-=]', Operator),
            (r'([a-zA-Z_][\w.]*)([ \t]*)(=)',
             bygroups(Name.Attribute, Text, Operator)),
            (r'[a-zA-Z_][\w.]*', Name),
        ],
        'package': [
            (r'[a-zA-Z_]\w*', Name.Namespace, '#pop'),
            default('#pop'),
        ],
        'message': [
            (r'[a-zA-Z_]\w*', Name.Class, '#pop'),
            default('#pop'),
        ],
        'type': [
            (r'[a-zA-Z_]\w*', Name, '#pop'),
            default('#pop'),
        ],
    }
Пример #7
0
class AlloyLexer(RegexLexer):
    """
    For `Alloy <http://alloy.mit.edu>`_ source code.

    .. versionadded:: 2.0
    """

    name = 'Alloy'
    aliases = ['alloy']
    filenames = ['*.als']
    mimetypes = ['text/x-alloy']

    flags = re.MULTILINE | re.DOTALL

    iden_rex = r'[a-zA-Z_][\w\']*'
    text_tuple = (r'[^\S\n]+', Text)

    tokens = {
        'sig': [
            (r'(extends)\b', Keyword, '#pop'),
            (iden_rex, Name),
            text_tuple,
            (r',', Punctuation),
            (r'\{', Operator, '#pop'),
        ],
        'module': [
            text_tuple,
            (iden_rex, Name, '#pop'),
        ],
        'fun': [
            text_tuple,
            (r'\{', Operator, '#pop'),
            (iden_rex, Name, '#pop'),
        ],
        'root': [
            (r'--.*?$', Comment.Single),
            (r'//.*?$', Comment.Single),
            (r'/\*.*?\*/', Comment.Multiline),
            text_tuple,
            (r'(module|open)(\s+)', bygroups(Keyword.Namespace,
                                             Text), 'module'),
            (r'(sig|enum)(\s+)', bygroups(Keyword.Declaration, Text), 'sig'),
            (r'(iden|univ|none)\b', Keyword.Constant),
            (r'(int|Int)\b', Keyword.Type),
            (r'(this|abstract|extends|set|seq|one|lone|let)\b', Keyword),
            (r'(all|some|no|sum|disj|when|else)\b', Keyword),
            (r'(run|check|for|but|exactly|expect|as)\b', Keyword),
            (r'(and|or|implies|iff|in)\b', Operator.Word),
            (r'(fun|pred|fact|assert)(\s+)', bygroups(Keyword, Text), 'fun'),
            (r'!|#|&&|\+\+|<<|>>|>=|<=>|<=|\.|->', Operator),
            (r'[-+/*%=<>&!^|~{}\[\]().]', Operator),
            (iden_rex, Name),
            (r'[:,]', Punctuation),
            (r'[0-9]+', Number.Integer),
            (r'"(\\\\|\\"|[^"])*"', String),
            (r'\n', Text),
        ]
    }
Пример #8
0
class CppLexer(CFamilyLexer):
    """
    For C++ source code with preprocessor directives.
    """
    name = 'C++'
    aliases = ['cpp', 'c++']
    filenames = [
        '*.cpp', '*.hpp', '*.c++', '*.h++', '*.cc', '*.hh', '*.cxx', '*.hxx',
        '*.C', '*.H', '*.cp', '*.CPP'
    ]
    mimetypes = ['text/x-c++hdr', 'text/x-c++src']
    priority = 0.1

    tokens = {
        'statements': [
            (words(
                ('catch', 'const_cast', 'delete', 'dynamic_cast', 'explicit',
                 'export', 'friend', 'mutable', 'namespace', 'new', 'operator',
                 'private', 'protected', 'public', 'reinterpret_cast',
                 'restrict', 'static_cast', 'template', 'this', 'throw',
                 'throws', 'try', 'typeid', 'typename', 'using', 'virtual',
                 'constexpr', 'nullptr', 'decltype', 'thread_local', 'alignas',
                 'alignof', 'static_assert', 'noexcept', 'override', 'final'),
                suffix=r'\b'), Keyword),
            (r'char(16_t|32_t)\b', Keyword.Type),
            (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
            # C++11 raw strings
            (r'(R)(")([^\\()\s]{,16})(\()((?:.|\n)*?)(\)\3)(")',
             bygroups(String.Affix, String, String.Delimiter, String.Delimiter,
                      String, String.Delimiter, String)),
            # C++11 UTF-8/16/32 strings
            (r'(u8|u|U)(")', bygroups(String.Affix, String), 'string'),
            inherit,
        ],
        'root': [
            inherit,
            # C++ Microsoft-isms
            (words(('virtual_inheritance', 'uuidof', 'super',
                    'single_inheritance', 'multiple_inheritance', 'interface',
                    'event'),
                   prefix=r'__',
                   suffix=r'\b'), Keyword.Reserved),
            # Offload C++ extensions, http://offload.codeplay.com/
            (r'__(offload|blockingoffload|outer)\b', Keyword.Pseudo),
        ],
        'classname': [
            (r'[a-zA-Z_]\w*', Name.Class, '#pop'),
            # template specification
            (r'\s*(?=>)', Text, '#pop'),
        ],
    }

    def analyse_text(text):
        if re.search('#include <[a-z_]+>', text):
            return 0.2
        if re.search('using namespace ', text):
            return 0.4
Пример #9
0
class BaseMakefileLexer(RegexLexer):
    """
    Lexer for simple Makefiles (no preprocessing).

    .. versionadded:: 0.10
    """

    name = 'Base Makefile'
    aliases = ['basemake']
    filenames = []
    mimetypes = []

    tokens = {
        'root': [
            # recipes (need to allow spaces because of expandtabs)
            (r'^(?:[\t ]+.*\n|\n)+', using(BashLexer)),
            # special variables
            (r'\$[<@$+%?|*]', Keyword),
            (r'\s+', Text),
            (r'#.*?\n', Comment),
            (r'(export)(\s+)(?=[\w${}\t -]+\n)',
             bygroups(Keyword, Text), 'export'),
            (r'export\s+', Keyword),
            # assignment
            (r'([\w${}().-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n)+|.*\n)',
             bygroups(Name.Variable, Text, Operator, Text, using(BashLexer))),
            # strings
            (r'(?s)"(\\\\|\\.|[^"\\])*"', String.Double),
            (r"(?s)'(\\\\|\\.|[^'\\])*'", String.Single),
            # targets
            (r'([^\n:]+)(:+)([ \t]*)', bygroups(Name.Function, Operator, Text),
             'block-header'),
            # expansions
            (r'\$\(', Keyword, 'expansion'),
        ],
        'expansion': [
            (r'[^\w$().-]+', Text),
            (r'[\w.-]+', Name.Variable),
            (r'\$', Keyword),
            (r'\(', Keyword, '#push'),
            (r'\)', Keyword, '#pop'),
        ],
        'export': [
            (r'[\w${}-]+', Name.Variable),
            (r'\n', Text, '#pop'),
            (r'\s+', Text),
        ],
        'block-header': [
            (r'[,|]', Punctuation),
            (r'#.*?\n', Comment, '#pop'),
            (r'\\\n', Text),  # line continuation
            (r'\$\(', Keyword, 'expansion'),
            (r'[a-zA-Z_]+', Name),
            (r'\n', Text, '#pop'),
            (r'.', Text),
        ],
    }
Пример #10
0
class BooLexer(RegexLexer):
    """
    For `Boo <http://boo.codehaus.org/>`_ source code.
    """

    name = 'Boo'
    aliases = ['boo']
    filenames = ['*.boo']
    mimetypes = ['text/x-boo']

    tokens = {
        'root': [
            (r'\s+', Text),
            (r'(#|//).*$', Comment.Single),
            (r'/[*]', Comment.Multiline, 'comment'),
            (r'[]{}:(),.;[]', Punctuation),
            (r'\\\n', Text),
            (r'\\', Text),
            (r'(in|is|and|or|not)\b', Operator.Word),
            (r'/(\\\\|\\/|[^/\s])/', String.Regex),
            (r'@/(\\\\|\\/|[^/])*/', String.Regex),
            (r'=~|!=|==|<<|>>|[-+/*%=<>&^|]', Operator),
            (r'(as|abstract|callable|constructor|destructor|do|import|'
             r'enum|event|final|get|interface|internal|of|override|'
             r'partial|private|protected|public|return|set|static|'
             r'struct|transient|virtual|yield|super|and|break|cast|'
             r'continue|elif|else|ensure|except|for|given|goto|if|in|'
             r'is|isa|not|or|otherwise|pass|raise|ref|try|unless|when|'
             r'while|from|as)\b', Keyword),
            (r'def(?=\s+\(.*?\))', Keyword),
            (r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'),
            (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
            (r'(namespace)(\s+)', bygroups(Keyword, Text), 'namespace'),
            (r'(?<!\.)(true|false|null|self|__eval__|__switch__|array|'
             r'assert|checked|enumerate|filter|getter|len|lock|map|'
             r'matrix|max|min|normalArrayIndexing|print|property|range|'
             r'rawArrayIndexing|required|typeof|unchecked|using|'
             r'yieldAll|zip)\b', Name.Builtin),
            (r'"""(\\\\|\\"|.*?)"""', String.Double),
            (r'"(\\\\|\\"|[^"]*?)"', String.Double),
            (r"'(\\\\|\\'|[^']*?)'", String.Single),
            (r'[a-zA-Z_]\w*', Name),
            (r'(\d+\.\d*|\d*\.\d+)([fF][+-]?[0-9]+)?', Number.Float),
            (r'[0-9][0-9.]*(ms?|d|h|s)', Number),
            (r'0\d+', Number.Oct),
            (r'0x[a-fA-F0-9]+', Number.Hex),
            (r'\d+L', Number.Integer.Long),
            (r'\d+', Number.Integer),
        ],
        'comment': [('/[*]', Comment.Multiline, '#push'),
                    ('[*]/', Comment.Multiline, '#pop'),
                    ('[^/*]', Comment.Multiline), ('[*/]', Comment.Multiline)],
        'funcname': [(r'[a-zA-Z_]\w*', Name.Function, '#pop')],
        'classname': [(r'[a-zA-Z_]\w*', Name.Class, '#pop')],
        'namespace': [(r'[a-zA-Z_][\w.]*', Name.Namespace, '#pop')]
    }
Пример #11
0
class DarcsPatchLexer(RegexLexer):
    """
    DarcsPatchLexer is a lexer for the various versions of the darcs patch
    format.  Examples of this format are derived by commands such as
    ``darcs annotate --patch`` and ``darcs send``.

    .. versionadded:: 0.10
    """

    name = 'Darcs Patch'
    aliases = ['dpatch']
    filenames = ['*.dpatch', '*.darcspatch']

    DPATCH_KEYWORDS = ('hunk', 'addfile', 'adddir', 'rmfile', 'rmdir', 'move',
                       'replace')

    tokens = {
        'root': [
            (r'<', Operator),
            (r'>', Operator),
            (r'\{', Operator),
            (r'\}', Operator),
            (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)(\])',
             bygroups(Operator, Keyword, Name, Text, Name, Operator,
                      Literal.Date, Text, Operator)),
            (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)',
             bygroups(Operator, Keyword, Name, Text, Name, Operator,
                      Literal.Date, Text), 'comment'),
            (r'New patches:', Generic.Heading),
            (r'Context:', Generic.Heading),
            (r'Patch bundle hash:', Generic.Heading),
            (r'(\s*)(%s)(.*\n)' % '|'.join(DPATCH_KEYWORDS),
             bygroups(Text, Keyword, Text)),
            (r'\+', Generic.Inserted, "insert"),
            (r'-', Generic.Deleted, "delete"),
            (r'.*\n', Text),
        ],
        'comment': [
            (r'[^\]].*\n', Comment),
            (r'\]', Operator, "#pop"),
        ],
        'specialText': [  # darcs add [_CODE_] special operators for clarity
            (r'\n', Text, "#pop"),  # line-based
            (r'\[_[^_]*_]', Operator),
        ],
        'insert': [
            include('specialText'),
            (r'\[', Generic.Inserted),
            (r'[^\n\[]+', Generic.Inserted),
        ],
        'delete': [
            include('specialText'),
            (r'\[', Generic.Deleted),
            (r'[^\n\[]+', Generic.Deleted),
        ],
    }
Пример #12
0
class PostgresLexer(PostgresBase, RegexLexer):
    """
    Lexer for the PostgreSQL dialect of SQL.

    .. versionadded:: 1.5
    """

    name = 'PostgreSQL SQL dialect'
    aliases = ['postgresql', 'postgres']
    mimetypes = ['text/x-postgresql']

    flags = re.IGNORECASE
    tokens = {
        'root': [
            (r'\s+', Text),
            (r'--.*\n?', Comment.Single),
            (r'/\*', Comment.Multiline, 'multiline-comments'),
            (r'(' + '|'.join(
                s.replace(" ", r"\s+")
                for s in DATATYPES + PSEUDO_TYPES) + r')\b', Name.Builtin),
            (words(KEYWORDS, suffix=r'\b'), Keyword),
            (r'[+*/<>=~!@#%^&|`?-]+', Operator),
            (r'::', Operator),  # cast
            (r'\$\d+', Name.Variable),
            (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float),
            (r'[0-9]+', Number.Integer),
            (r"((?:E|U&)?)(')", bygroups(String.Affix,
                                         String.Single), 'string'),
            # quoted identifier
            (r'((?:U&)?)(")', bygroups(String.Affix,
                                       String.Name), 'quoted-ident'),
            (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback),
            (r'[a-z_]\w*', Name),

            # psql variable in SQL
            (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),
            (r'[;:()\[\]{},.]', Punctuation),
        ],
        'multiline-comments':
        [(r'/\*', Comment.Multiline, 'multiline-comments'),
         (r'\*/', Comment.Multiline, '#pop'), (r'[^/*]+', Comment.Multiline),
         (r'[/*]', Comment.Multiline)],
        'string': [
            (r"[^']+", String.Single),
            (r"''", String.Single),
            (r"'", String.Single, '#pop'),
        ],
        'quoted-ident': [
            (r'[^"]+', String.Name),
            (r'""', String.Name),
            (r'"', String.Name, '#pop'),
        ],
    }
Пример #13
0
class DebianControlLexer(RegexLexer):
    """
    Lexer for Debian ``control`` files and ``apt-cache show <pkg>`` outputs.

    .. versionadded:: 0.9
    """
    name = 'Debian Control file'
    aliases = ['control', 'debcontrol']
    filenames = ['control']

    tokens = {
        'root': [
            (r'^(Description)', Keyword, 'description'),
            (r'^(Maintainer)(:\s*)', bygroups(Keyword, Text), 'maintainer'),
            (r'^((Build-)?Depends)', Keyword, 'depends'),
            (r'^((?:Python-)?Version)(:\s*)(\S+)$',
             bygroups(Keyword, Text, Number)),
            (r'^((?:Installed-)?Size)(:\s*)(\S+)$',
             bygroups(Keyword, Text, Number)),
            (r'^(MD5Sum|SHA1|SHA256)(:\s*)(\S+)$',
             bygroups(Keyword, Text, Number)),
            (r'^([a-zA-Z\-0-9\.]*?)(:\s*)(.*?)$',
             bygroups(Keyword, Whitespace, String)),
        ],
        'maintainer': [
            (r'<[^>]+>', Generic.Strong),
            (r'<[^>]+>$', Generic.Strong, '#pop'),
            (r',\n?', Text),
            (r'.', Text),
        ],
        'description': [
            (r'(.*)(Homepage)(: )(\S+)',
             bygroups(Text, String, Name, Name.Class)),
            (r':.*\n', Generic.Strong),
            (r' .*\n', Text),
            default('#pop'),
        ],
        'depends': [
            (r':\s*', Text),
            (r'(\$)(\{)(\w+\s*:\s*\w+)', bygroups(Operator, Text,
                                                  Name.Entity)),
            (r'\(', Text, 'depend_vers'),
            (r',', Text),
            (r'\|', Operator),
            (r'[\s]+', Text),
            (r'[})]\s*$', Text, '#pop'),
            (r'\}', Text),
            (r'[^,]$', Name.Function, '#pop'),
            (r'([+.a-zA-Z0-9-])(\s*)', bygroups(Name.Function, Text)),
            (r'\[.*?\]', Name.Entity),
        ],
        'depend_vers':
        [(r'\),', Text, '#pop'), (r'\)[^,]', Text, '#pop:2'),
         (r'([><=]+)(\s*)([^)]+)', bygroups(Operator, Text, Number))]
    }
Пример #14
0
    def gen_elixir_sigil_rules():
        # all valid sigil terminators (excluding heredocs)
        terminators = [
            (r'\{', r'\}', 'cb'),
            (r'\[', r'\]', 'sb'),
            (r'\(', r'\)', 'pa'),
            (r'<', r'>', 'ab'),
            (r'/', r'/', 'slas'),
            (r'\|', r'\|', 'pipe'),
            ('"', '"', 'quot'),
            ("'", "'", 'apos'),
        ]

        # heredocs have slightly different rules
        triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')]

        token = String.Other
        states = {'sigils': []}

        for term, name in triquotes:
            states['sigils'] += [
                (r'(~[a-z])(%s)' % (term,), bygroups(token, String.Heredoc),
                    (name + '-end', name + '-intp')),
                (r'(~[A-Z])(%s)' % (term,), bygroups(token, String.Heredoc),
                    (name + '-end', name + '-no-intp')),
            ]

            states[name + '-end'] = [
                (r'[a-zA-Z]+', token, '#pop'),
                default('#pop'),
            ]
            states[name + '-intp'] = [
                (r'^\s*' + term, String.Heredoc, '#pop'),
                include('heredoc_interpol'),
            ]
            states[name + '-no-intp'] = [
                (r'^\s*' + term, String.Heredoc, '#pop'),
                include('heredoc_no_interpol'),
            ]

        for lterm, rterm, name in terminators:
            states['sigils'] += [
                (r'~[a-z]' + lterm, token, name + '-intp'),
                (r'~[A-Z]' + lterm, token, name + '-no-intp'),
            ]
            states[name + '-intp'] = gen_elixir_sigstr_rules(rterm, token)
            states[name + '-no-intp'] = \
                gen_elixir_sigstr_rules(rterm, token, interpol=False)

        return states
Пример #15
0
class ScssLexer(RegexLexer):
    """
    For SCSS stylesheets.
    """

    name = 'SCSS'
    aliases = ['scss']
    filenames = ['*.scss']
    mimetypes = ['text/x-scss']

    flags = re.IGNORECASE | re.DOTALL
    tokens = {
        'root': [
            (r'\s+', Text),
            (r'//.*?\n', Comment.Single),
            (r'/\*.*?\*/', Comment.Multiline),
            (r'@import', Keyword, 'value'),
            (r'@for', Keyword, 'for'),
            (r'@(debug|warn|if|while)', Keyword, 'value'),
            (r'(@mixin)( [\w-]+)', bygroups(Keyword, Name.Function), 'value'),
            (r'(@include)( [\w-]+)', bygroups(Keyword,
                                              Name.Decorator), 'value'),
            (r'@extend', Keyword, 'selector'),
            (r'(@media)(\s+)', bygroups(Keyword, Text), 'value'),
            (r'@[\w-]+', Keyword, 'selector'),
            (r'(\$[\w-]*\w)([ \t]*:)', bygroups(Name.Variable,
                                                Operator), 'value'),
            # TODO: broken, and prone to infinite loops.
            # (r'(?=[^;{}][;}])', Name.Attribute, 'attr'),
            # (r'(?=[^;{}:]+:[^a-z])', Name.Attribute, 'attr'),
            default('selector'),
        ],
        'attr': [
            (r'[^\s:="\[]+', Name.Attribute),
            (r'#\{', String.Interpol, 'interpolation'),
            (r'[ \t]*:', Operator, 'value'),
            default('#pop'),
        ],
        'inline-comment': [
            (r"(\\#|#(?=[^{])|\*(?=[^/])|[^#*])+", Comment.Multiline),
            (r'#\{', String.Interpol, 'interpolation'),
            (r"\*/", Comment, '#pop'),
        ],
    }
    for group, common in iteritems(common_sass_tokens):
        tokens[group] = copy.copy(common)
    tokens['value'].extend([(r'\n', Text), (r'[;{}]', Punctuation, '#pop')])
    tokens['selector'].extend([(r'\n', Text), (r'[;{}]', Punctuation, '#pop')])
Пример #16
0
class CharmciLexer(CppLexer):
    """
    For `Charm++ <https://charm.cs.illinois.edu>`_ interface files (.ci).

    .. versionadded:: 2.4
    """

    name = 'Charmci'
    aliases = ['charmci']
    filenames = ['*.ci']

    mimetypes = []

    tokens = {
        'statements': [
            (r'(module)(\s+)', bygroups(Keyword, Text), 'classname'),
            (words(('mainmodule', 'mainchare', 'chare', 'array', 'group',
                    'nodegroup', 'message', 'conditional')), Keyword),
            (words(('entry', 'aggregate', 'threaded', 'sync', 'exclusive',
                    'nokeep', 'notrace', 'immediate', 'expedited', 'inline',
                    'local', 'python', 'accel', 'readwrite', 'writeonly',
                    'accelblock', 'memcritical', 'packed', 'varsize',
                    'initproc', 'initnode', 'initcall', 'stacksize',
                    'createhere', 'createhome', 'reductiontarget', 'iget',
                    'nocopy', 'mutable', 'migratable', 'readonly')), Keyword),
            inherit,
        ],
    }
Пример #17
0
class VGLLexer(RegexLexer):
    """
    For `SampleManager VGL <http://www.thermoscientific.com/samplemanager>`_
    source code.

    .. versionadded:: 1.6
    """
    name = 'VGL'
    aliases = ['vgl']
    filenames = ['*.rpf']

    flags = re.MULTILINE | re.DOTALL | re.IGNORECASE

    tokens = {
        'root':
        [(r'\{[^}]*\}', Comment.Multiline), (r'declare', Keyword.Constant),
         (r'(if|then|else|endif|while|do|endwhile|and|or|prompt|object'
          r'|create|on|line|with|global|routine|value|endroutine|constant'
          r'|global|set|join|library|compile_option|file|exists|create|copy'
          r'|delete|enable|windows|name|notprotected)(?! *[=<>.,()])',
          Keyword),
         (r'(true|false|null|empty|error|locked)', Keyword.Constant),
         (r'[~^*#!%&\[\]()<>|+=:;,./?-]', Operator), (r'"[^"]*"', String),
         (r'(\.)([a-z_$][\w$]*)', bygroups(Operator, Name.Attribute)),
         (r'[0-9][0-9]*(\.[0-9]+(e[+\-]?[0-9]+)?)?', Number),
         (r'[a-z_$][\w$]*', Name), (r'[\r\n]+', Text), (r'\s+', Text)]
    }
Пример #18
0
class NewspeakLexer(RegexLexer):
    """
    For `Newspeak <http://newspeaklanguage.org/>` syntax.

    .. versionadded:: 1.1
    """
    name = 'Newspeak'
    filenames = ['*.ns2']
    aliases = [
        'newspeak',
    ]
    mimetypes = ['text/x-newspeak']

    tokens = {
        'root':
        [(r'\b(Newsqueak2)\b', Keyword.Declaration), (r"'[^']*'", String),
         (r'\b(class)(\s+)(\w+)(\s*)',
          bygroups(Keyword.Declaration, Text, Name.Class, Text)),
         (r'\b(mixin|self|super|private|public|protected|nil|true|false)\b',
          Keyword),
         (r'(\w+\:)(\s*)([a-zA-Z_]\w+)',
          bygroups(Name.Function, Text, Name.Variable)),
         (r'(\w+)(\s*)(=)', bygroups(Name.Attribute, Text, Operator)),
         (r'<\w+>', Comment.Special),
         include('expressionstat'),
         include('whitespace')],
        'expressionstat': [
            (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
            (r'\d+', Number.Integer),
            (r':\w+', Name.Variable),
            (r'(\w+)(::)', bygroups(Name.Variable, Operator)),
            (r'\w+:', Name.Function),
            (r'\w+', Name.Variable),
            (r'\(|\)', Punctuation),
            (r'\[|\]', Punctuation),
            (r'\{|\}', Punctuation),
            (r'(\^|\+|\/|~|\*|<|>|=|@|%|\||&|\?|!|,|-|:)', Operator),
            (r'\.|;', Punctuation),
            include('whitespace'),
            include('literals'),
        ],
        'literals':
        [(r'\$.', String), (r"'[^']*'", String), (r"#'[^']*'", String.Symbol),
         (r"#\w+:?", String.Symbol),
         (r"#(\+|\/|~|\*|<|>|=|@|%|\||&|\?|!|,|-)+", String.Symbol)],
        'whitespace': [(r'\s+', Text), (r'"[^"]*"', Comment)],
    }
Пример #19
0
class SnobolLexer(RegexLexer):
    """
    Lexer for the SNOBOL4 programming language.

    Recognizes the common ASCII equivalents of the original SNOBOL4 operators.
    Does not require spaces around binary operators.

    .. versionadded:: 1.5
    """

    name = "Snobol"
    aliases = ["snobol"]
    filenames = ['*.snobol']
    mimetypes = ['text/x-snobol']

    tokens = {
        # root state, start of line
        # comments, continuation lines, and directives start in column 1
        # as do labels
        'root': [
            (r'\*.*\n', Comment),
            (r'[+.] ', Punctuation, 'statement'),
            (r'-.*\n', Comment),
            (r'END\s*\n', Name.Label, 'heredoc'),
            (r'[A-Za-z$][\w$]*', Name.Label, 'statement'),
            (r'\s+', Text, 'statement'),
        ],
        # statement state, line after continuation or label
        'statement': [
            (r'\s*\n', Text, '#pop'),
            (r'\s+', Text),
            (r'(?<=[^\w.])(LT|LE|EQ|NE|GE|GT|INTEGER|IDENT|DIFFER|LGT|SIZE|'
             r'REPLACE|TRIM|DUPL|REMDR|DATE|TIME|EVAL|APPLY|OPSYN|LOAD|UNLOAD|'
             r'LEN|SPAN|BREAK|ANY|NOTANY|TAB|RTAB|REM|POS|RPOS|FAIL|FENCE|'
             r'ABORT|ARB|ARBNO|BAL|SUCCEED|INPUT|OUTPUT|TERMINAL)(?=[^\w.])',
             Name.Builtin),
            (r'[A-Za-z][\w.]*', Name),
            # ASCII equivalents of original operators
            # | for the EBCDIC equivalent, ! likewise
            # \ for EBCDIC negation
            (r'\*\*|[?$.!%*/#+\-@|&\\=]', Operator),
            (r'"[^"]*"', String),
            (r"'[^']*'", String),
            # Accept SPITBOL syntax for real numbers
            # as well as Macro SNOBOL4
            (r'[0-9]+(?=[^.EeDd])', Number.Integer),
            (r'[0-9]+(\.[0-9]*)?([EDed][-+]?[0-9]+)?', Number.Float),
            # Goto
            (r':', Punctuation, 'goto'),
            (r'[()<>,;]', Punctuation),
        ],
        # Goto block
        'goto': [(r'\s*\n', Text, "#pop:2"), (r'\s+', Text), (r'F|S', Keyword),
                 (r'(\()([A-Za-z][\w.]*)(\))',
                  bygroups(Punctuation, Name.Label, Punctuation))],
        # everything after the END statement is basically one
        # big heredoc.
        'heredoc': [(r'.*\n', String.Heredoc)]
    }
Пример #20
0
class RslLexer(RegexLexer):
    """
    `RSL <http://en.wikipedia.org/wiki/RAISE>`_ is the formal specification
    language used in RAISE (Rigorous Approach to Industrial Software Engineering)
    method.

    .. versionadded:: 2.0
    """
    name = 'RSL'
    aliases = ['rsl']
    filenames = ['*.rsl']
    mimetypes = ['text/rsl']

    flags = re.MULTILINE | re.DOTALL

    tokens = {
        'root': [
            (words(
                ('Bool', 'Char', 'Int', 'Nat', 'Real', 'Text', 'Unit', 'abs',
                 'all', 'always', 'any', 'as', 'axiom', 'card', 'case',
                 'channel', 'chaos', 'class', 'devt_relation', 'dom', 'elems',
                 'else', 'elif', 'end', 'exists', 'extend', 'false', 'for',
                 'hd', 'hide', 'if', 'in', 'is', 'inds', 'initialise', 'int',
                 'inter', 'isin', 'len', 'let', 'local', 'ltl_assertion',
                 'object', 'of', 'out', 'post', 'pre', 'read', 'real', 'rng',
                 'scheme', 'skip', 'stop', 'swap', 'then', 'theory',
                 'test_case', 'tl', 'transition_system', 'true', 'type',
                 'union', 'until', 'use', 'value', 'variable', 'while', 'with',
                 'write', '~isin', '-inflist', '-infset', '-list', '-set'),
                prefix=r'\b',
                suffix=r'\b'), Keyword),
            (r'(variable|value)\b', Keyword.Declaration),
            (r'--.*?\n', Comment),
            (r'<:.*?:>', Comment),
            (r'\{!.*?!\}', Comment),
            (r'/\*.*?\*/', Comment),
            (r'^[ \t]*([\w]+)[ \t]*:[^:]', Name.Function),
            (r'(^[ \t]*)([\w]+)([ \t]*\([\w\s,]*\)[ \t]*)(is|as)',
             bygroups(Text, Name.Function, Text, Keyword)),
            (r'\b[A-Z]\w*\b', Keyword.Type),
            (r'(true|false)\b', Keyword.Constant),
            (r'".*"', String),
            (r'\'.\'', String.Char),
            (r'(><|->|-m->|/\\|<=|<<=|<\.|\|\||\|\^\||-~->|-~m->|\\/|>=|>>|'
             r'\.>|\+\+|-\\|<->|=>|:-|~=|\*\*|<<|>>=|\+>|!!|\|=\||#)',
             Operator),
            (r'[0-9]+\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
            (r'0x[0-9a-f]+', Number.Hex),
            (r'[0-9]+', Number.Integer),
            (r'.', Text),
        ],
    }

    def analyse_text(text):
        """
        Check for the most common text in the beginning of a RSL file.
        """
        if re.search(r'scheme\s*.*?=\s*class\s*type', text, re.I) is not None:
            return 1.0
Пример #21
0
def _objdump_lexer_tokens(asm_lexer):
    """
    Common objdump lexer tokens to wrap an ASM lexer.
    """
    hex_re = r'[0-9A-Za-z]'
    return {
        'root': [
            # File name & format:
            ('(.*?)(:)( +file format )(.*?)$',
             bygroups(Name.Label, Punctuation, Text, String)),
            # Section header
            ('(Disassembly of section )(.*?)(:)$',
             bygroups(Text, Name.Label, Punctuation)),
            # Function labels
            # (With offset)
            ('(' + hex_re + '+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$',
             bygroups(Number.Hex, Text, Punctuation, Name.Function,
                      Punctuation, Number.Hex, Punctuation)),
            # (Without offset)
            ('(' + hex_re + '+)( )(<)(.*?)(>:)$',
             bygroups(Number.Hex, Text, Punctuation, Name.Function,
                      Punctuation)),
            # Code line with disassembled instructions
            ('( *)(' + hex_re + r'+:)(\t)((?:' + hex_re + hex_re +
             ' )+)( *\t)([a-zA-Z].*?)$',
             bygroups(Text, Name.Label, Text, Number.Hex, Text,
                      using(asm_lexer))),
            # Code line with ascii
            ('( *)(' + hex_re + r'+:)(\t)((?:' + hex_re + hex_re +
             ' )+)( *)(.*?)$',
             bygroups(Text, Name.Label, Text, Number.Hex, Text, String)),
            # Continued code line, only raw opcodes without disassembled
            # instruction
            ('( *)(' + hex_re + r'+:)(\t)((?:' + hex_re + hex_re + ' )+)$',
             bygroups(Text, Name.Label, Text, Number.Hex)),
            # Skipped a few bytes
            (r'\t\.\.\.$', Text),
            # Relocation line
            # (With offset)
            (r'(\t\t\t)(' + hex_re + r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x' +
             hex_re + '+)$',
             bygroups(Text, Name.Label, Text, Name.Property, Text,
                      Name.Constant, Punctuation, Number.Hex)),
            # (Without offset)
            (r'(\t\t\t)(' + hex_re + r'+:)( )([^\t]+)(\t)(.*?)$',
             bygroups(Text, Name.Label, Text, Name.Property, Text,
                      Name.Constant)),
            (r'[^\n]+\n', Other)
        ]
    }
Пример #22
0
 def _make_call_state(compound,
                      _label=_label,
                      _label_compound=_label_compound):
     state = []
     if compound:
         state.append((r'(?=\))', Text, '#pop'))
     state.append((r'(:?)(%s)' % (_label_compound if compound else _label),
                   bygroups(Punctuation, Name.Label), '#pop'))
     return state
Пример #23
0
class XorgLexer(RegexLexer):
    """Lexer for xorg.conf file."""
    name = 'Xorg'
    aliases = ['xorg.conf']
    filenames = ['xorg.conf']
    mimetypes = []

    tokens = {
        'root': [
            (r'\s+', Text),
            (r'#.*$', Comment),
            (r'((?:Sub)?Section)(\s+)("\w+")',
             bygroups(String.Escape, Text, String.Escape)),
            (r'(End(|Sub)Section)', String.Escape),
            (r'(\w+)(\s+)([^\n#]+)', bygroups(Name.Builtin, Text,
                                              Name.Constant)),
        ],
    }
Пример #24
0
 def _make_redirect_state(compound,
                          _core_token_compound=_core_token_compound,
                          _nl=_nl,
                          _punct=_punct,
                          _stoken=_stoken,
                          _string=_string,
                          _space=_space,
                          _variable=_variable,
                          _ws=_ws):
     stoken_compound = (r'(?:[%s]+|(?:%s|%s|%s)+)' %
                        (_punct, _string, _variable, _core_token_compound))
     return [
         (r'((?:(?<=[%s%s])\d)?)(>>?&|<&)([%s%s]*)(\d)' %
          (_nl, _ws, _nl, _ws),
          bygroups(Number.Integer, Punctuation, Text, Number.Integer)),
         (r'((?:(?<=[%s%s])(?<!\^[%s])\d)?)(>>?|<)(%s?%s)' %
          (_nl, _ws, _nl, _space, stoken_compound if compound else _stoken),
          bygroups(Number.Integer, Punctuation, using(this, state='text')))
     ]
Пример #25
0
def gen_elixir_string_rules(name, symbol, token):
    states = {}
    states['string_' + name] = [
        (r'[^#%s\\]+' % (symbol,), token),
        include('escapes'),
        (r'\\.', token),
        (r'(%s)' % (symbol,), bygroups(token), "#pop"),
        include('interpol')
    ]
    return states
Пример #26
0
class ScdocLexer(RegexLexer):
    """
    `scdoc` is a simple man page generator for POSIX systems written in C99.
    https://git.sr.ht/~sircmpwn/scdoc

    .. versionadded:: 2.5
    """
    name = 'scdoc'
    aliases = ['scdoc', 'scd']
    filenames = ['*.scd', '*.scdoc']
    flags = re.MULTILINE

    tokens = {
        'root': [
            # comment
            (r'^(;.+\n)', bygroups(Comment)),

            # heading with pound prefix
            (r'^(#)([^#].+\n)', bygroups(Generic.Heading, Text)),
            (r'^(#{2})(.+\n)', bygroups(Generic.Subheading, Text)),
            # bulleted lists
            (r'^(\s*)([*-])(\s)(.+\n)',
             bygroups(Text, Keyword, Text, using(this, state='inline'))),
            # numbered lists
            (r'^(\s*)(\.+\.)( .+\n)',
             bygroups(Text, Keyword, using(this, state='inline'))),
            # quote
            (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),
            # text block
            (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),
            include('inline'),
        ],
        'inline': [
            # escape
            (r'\\.', Text),
            # underlines
            (r'(\s)(_[^_]+_)(\W|\n)', bygroups(Text, Generic.Emph, Text)),
            # bold
            (r'(\s)(\*[^\*]+\*)(\W|\n)', bygroups(Text, Generic.Strong, Text)),
            # inline code
            (r'`[^`]+`', String.Backtick),

            # general text, must come last!
            (r'[^\\\s]+', Text),
            (r'.', Text),
        ],
    }
Пример #27
0
class ResourceLexer(RegexLexer):
    """Lexer for `ICU Resource bundles
    <http://userguide.icu-project.org/locale/resources>`_.

    .. versionadded:: 2.0
    """
    name = 'ResourceBundle'
    aliases = ['resource', 'resourcebundle']
    filenames = []

    _types = (':table', ':array', ':string', ':bin', ':import', ':intvector',
              ':int', ':alias')

    flags = re.MULTILINE | re.IGNORECASE
    tokens = {
        'root': [
            (r'//.*?$', Comment),
            (r'"', String, 'string'),
            (r'-?\d+', Number.Integer),
            (r'[,{}]', Operator),
            (r'([^\s{:]+)(\s*)(%s?)' % '|'.join(_types),
             bygroups(Name, Text, Keyword)),
            (r'\s+', Text),
            (words(_types), Keyword),
        ],
        'string':
        [(r'(\\x[0-9a-f]{2}|\\u[0-9a-f]{4}|\\U00[0-9a-f]{6}|'
          r'\\[0-7]{1,3}|\\c.|\\[abtnvfre\'"?\\]|\\\{|[^"{\\])+', String),
         (r'\{', String.Escape, 'msgname'), (r'"', String, '#pop')],
        'msgname':
        [(r'([^{},]+)(\s*)', bygroups(Name,
                                      String.Escape), ('#pop', 'message'))],
        'message':
        [(r'\{', String.Escape, 'msgname'), (r'\}', String.Escape, '#pop'),
         (r'(,)(\s*)([a-z]+)(\s*\})',
          bygroups(Operator, String.Escape, Keyword, String.Escape), '#pop'),
         (r'(,)(\s*)([a-z]+)(\s*)(,)(\s*)(offset)(\s*)(:)(\s*)(-?\d+)(\s*)',
          bygroups(Operator, String.Escape, Keyword, String.Escape, Operator,
                   String.Escape, Operator.Word, String.Escape, Operator,
                   String.Escape, Number.Integer, String.Escape), 'choice'),
         (r'(,)(\s*)([a-z]+)(\s*)(,)(\s*)',
          bygroups(Operator, String.Escape, Keyword, String.Escape, Operator,
                   String.Escape), 'choice'), (r'\s+', String.Escape)],
        'choice': [(r'(=|<|>|<=|>=|!=)(-?\d+)(\s*\{)',
                    bygroups(Operator, Number.Integer,
                             String.Escape), 'message'),
                   (r'([a-z]+)(\s*\{)', bygroups(Keyword.Type,
                                                 String.Escape), 'str'),
                   (r'\}', String.Escape, ('#pop', '#pop')),
                   (r'\s+', String.Escape)],
        'str': [(r'\}', String.Escape, '#pop'),
                (r'\{', String.Escape, 'msgname'), (r'[^{}]+', String)]
    }

    def analyse_text(text):
        if text.startswith('root:table'):
            return 1.0
Пример #28
0
class HspecLexer(HaskellLexer):
    """
    A Haskell lexer with support for Hspec constructs.

    .. versionadded:: 2.4.0
    """

    name = 'Hspec'
    aliases = ['hspec']
    filenames = []
    mimetypes = []

    tokens = {
        'root': [
            (r'(it\s*)("[^"]*")', bygroups(Text, String.Doc)),
            (r'(describe\s*)("[^"]*")', bygroups(Text, String.Doc)),
            (r'(context\s*)("[^"]*")', bygroups(Text, String.Doc)),
            inherit,
        ],
    }
Пример #29
0
class GenericAspxLexer(RegexLexer):
    """
    Lexer for ASP.NET pages.
    """

    name = 'aspx-gen'
    filenames = []
    mimetypes = []

    flags = re.DOTALL

    tokens = {
        'root': [
            (r'(<%[@=#]?)(.*?)(%>)', bygroups(Name.Tag, Other, Name.Tag)),
            (r'(<script.*?>)(.*?)(</script>)',
             bygroups(using(XmlLexer), Other, using(XmlLexer))),
            (r'(.+?)(?=<)', using(XmlLexer)),
            (r'.+', using(XmlLexer)),
        ],
    }
Пример #30
0
class GroffLexer(RegexLexer):
    """
    Lexer for the (g)roff typesetting language, supporting groff
    extensions. Mainly useful for highlighting manpage sources.

    .. versionadded:: 0.6
    """

    name = 'Groff'
    aliases = ['groff', 'nroff', 'man']
    filenames = ['*.[1234567]', '*.man']
    mimetypes = ['application/x-troff', 'text/troff']

    tokens = {
        'root': [
            (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'),
            (r'\.', Punctuation, 'request'),
            # Regular characters, slurp till we find a backslash or newline
            (r'[^\\\n]+', Text, 'textline'),
            default('textline'),
        ],
        'textline': [
            include('escapes'),
            (r'[^\\\n]+', Text),
            (r'\n', Text, '#pop'),
        ],
        'escapes': [
            # groff has many ways to write escapes.
            (r'\\"[^\n]*', Comment),
            (r'\\[fn]\w', String.Escape),
            (r'\\\(.{2}', String.Escape),
            (r'\\.\[.*\]', String.Escape),
            (r'\\.', String.Escape),
            (r'\\\n', Text, 'request'),
        ],
        'request': [
            (r'\n', Text, '#pop'),
            include('escapes'),
            (r'"[^\n"]+"', String.Double),
            (r'\d+', Number),
            (r'\S+', String),
            (r'\s+', Text),
        ],
    }

    def analyse_text(text):
        if text[:1] != '.':
            return False
        if text[:3] == '.\\"':
            return True
        if text[:4] == '.TH ':
            return True
        if text[1:3].isalnum() and text[3].isspace():
            return 0.9