class ScdocLexer(RegexLexer): """ `scdoc` is a simple man page generator for POSIX systems written in C99. https://git.sr.ht/~sircmpwn/scdoc .. versionadded:: 2.5 """ name = 'scdoc' aliases = ['scdoc', 'scd'] filenames = ['*.scd', '*.scdoc'] flags = re.MULTILINE tokens = { 'root': [ # comment (r'^(;.+\n)', bygroups(Comment)), # heading with pound prefix (r'^(#)([^#].+\n)', bygroups(Generic.Heading, Text)), (r'^(#{2})(.+\n)', bygroups(Generic.Subheading, Text)), # bulleted lists (r'^(\s*)([*-])(\s)(.+\n)', bygroups(Text, Keyword, Text, using(this, state='inline'))), # numbered lists (r'^(\s*)(\.+\.)( .+\n)', bygroups(Text, Keyword, using(this, state='inline'))), # quote (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)), # text block (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)), include('inline'), ], 'inline': [ # escape (r'\\.', Text), # underlines (r'(\s)(_[^_]+_)(\W|\n)', bygroups(Text, Generic.Emph, Text)), # bold (r'(\s)(\*[^*]+\*)(\W|\n)', bygroups(Text, Generic.Strong, Text)), # inline code (r'`[^`]+`', String.Backtick), # general text, must come last! (r'[^\\\s]+', Text), (r'.', Text), ], } def analyse_text(text): """This is very similar to markdown, save for the escape characters needed for * and _.""" result = 0 if '\\*' in text: result += 0.01 if '\\_' in text: result += 0.01 return result
class BaseMakefileLexer(RegexLexer): """ Lexer for simple Makefiles (no preprocessing). .. versionadded:: 0.10 """ name = 'Base Makefile' aliases = ['basemake'] filenames = [] mimetypes = [] tokens = { 'root': [ # recipes (need to allow spaces because of expandtabs) (r'^(?:[\t ]+.*\n|\n)+', using(BashLexer)), # special variables (r'\$[<@$+%?|*]', Keyword), (r'\s+', Text), (r'#.*?\n', Comment), (r'(export)(\s+)(?=[\w${}\t -]+\n)', bygroups(Keyword, Text), 'export'), (r'export\s+', Keyword), # assignment (r'([\w${}().-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n)+|.*\n)', bygroups(Name.Variable, Text, Operator, Text, using(BashLexer))), # strings (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), # targets (r'([^\n:]+)(:+)([ \t]*)', bygroups(Name.Function, Operator, Text), 'block-header'), # expansions (r'\$\(', Keyword, 'expansion'), ], 'expansion': [ (r'[^\w$().-]+', Text), (r'[\w.-]+', Name.Variable), (r'\$', Keyword), (r'\(', Keyword, '#push'), (r'\)', Keyword, '#pop'), ], 'export': [ (r'[\w${}-]+', Name.Variable), (r'\n', Text, '#pop'), (r'\s+', Text), ], 'block-header': [ (r'[,|]', Punctuation), (r'#.*?\n', Comment, '#pop'), (r'\\\n', Text), # line continuation (r'\$\(', Keyword, 'expansion'), (r'[a-zA-Z_]+', Name), (r'\n', Text, '#pop'), (r'.', Text), ], }
class PythonTracebackLexer(RegexLexer): """ For Python tracebacks. .. versionadded:: 0.7 """ name = 'Python Traceback' aliases = ['pytb'] filenames = ['*.pytb'] mimetypes = ['text/x-python-traceback'] tokens = { 'root': [ # Cover both (most recent call last) and (innermost last) # The optional ^C allows us to catch keyboard interrupt signals. (r'^(\^C)?(Traceback.*\n)', bygroups(Text, Generic.Traceback), 'intb'), # SyntaxError starts with this. (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'), (r'^.*\n', Other), ], 'intb': [ (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)', bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)), (r'^( File )("[^"]+")(, line )(\d+)(\n)', bygroups(Text, Name.Builtin, Text, Number, Text)), (r'^( )(.+)(\n)', bygroups(Text, using(PythonLexer), Text)), (r'^([ \t]*)(\.\.\.)(\n)', bygroups(Text, Comment, Text)), # for doctests... (r'^([^:]+)(: )(.+)(\n)', bygroups(Generic.Error, Text, Name, Text), '#pop'), (r'^([a-zA-Z_]\w*)(:?\n)', bygroups(Generic.Error, Text), '#pop') ], }
class GenericAspxLexer(RegexLexer): """ Lexer for ASP.NET pages. """ name = 'aspx-gen' filenames = [] mimetypes = [] flags = re.DOTALL tokens = { 'root': [ (r'(<%[@=#]?)(.*?)(%>)', bygroups(Name.Tag, Other, Name.Tag)), (r'(<script.*?>)(.*?)(</script>)', bygroups(using(XmlLexer), Other, using(XmlLexer))), (r'(.+?)(?=<)', using(XmlLexer)), (r'.+', using(XmlLexer)), ], }
class CsoundDocumentLexer(RegexLexer): """ For `Csound <https://csound.com>`_ documents. .. versionadded:: 2.1 """ name = 'Csound Document' aliases = ['csound-document', 'csound-csd'] filenames = ['*.csd'] # These tokens are based on those in XmlLexer in pygments/lexers/html.py. Making # CsoundDocumentLexer a subclass of XmlLexer rather than RegexLexer may seem like a # better idea, since Csound Document files look like XML files. However, Csound # Documents can contain Csound comments (preceded by //, for example) before and # after the root element, unescaped bitwise AND & and less than < operators, etc. In # other words, while Csound Document files look like XML files, they may not actually # be XML files. tokens = { 'root': [(r'/[*](.|\n)*?[*]/', Comment.Multiline), (r'(?:;|//).*$', Comment.Single), (r'[^/;<]+|/(?!/)', Text), (r'<\s*CsInstruments', Name.Tag, ('orchestra', 'tag')), (r'<\s*CsScore', Name.Tag, ('score', 'tag')), (r'<\s*[Hh][Tt][Mm][Ll]', Name.Tag, ('HTML', 'tag')), (r'<\s*[\w:.-]+', Name.Tag, 'tag'), (r'<\s*/\s*[\w:.-]+\s*>', Name.Tag)], 'orchestra': [(r'<\s*/\s*CsInstruments\s*>', Name.Tag, '#pop'), (r'(.|\n)+?(?=<\s*/\s*CsInstruments\s*>)', using(CsoundOrchestraLexer))], 'score': [(r'<\s*/\s*CsScore\s*>', Name.Tag, '#pop'), (r'(.|\n)+?(?=<\s*/\s*CsScore\s*>)', using(CsoundScoreLexer))], 'HTML': [(r'<\s*/\s*[Hh][Tt][Mm][Ll]\s*>', Name.Tag, '#pop'), (r'(.|\n)+?(?=<\s*/\s*[Hh][Tt][Mm][Ll]\s*>)', using(HtmlLexer))], 'tag': [(r'\s+', Text), (r'[\w.:-]+\s*=', Name.Attribute, 'attr'), (r'/?\s*>', Name.Tag, '#pop')], 'attr': [(r'\s+', Text), (r'".*?"', String, '#pop'), (r"'.*?'", String, '#pop'), (r'[^\s>]+', String, '#pop')] }
class DockerLexer(RegexLexer): """ Lexer for `Docker <http://docker.io>`_ configuration files. .. versionadded:: 2.0 """ name = 'Docker' aliases = ['docker', 'dockerfile'] filenames = ['Dockerfile', '*.docker'] mimetypes = ['text/x-dockerfile-config'] _keywords = (r'(?:MAINTAINER|EXPOSE|WORKDIR|USER|STOPSIGNAL)') _bash_keywords = (r'(?:RUN|CMD|ENTRYPOINT|ENV|ARG|LABEL|ADD|COPY)') _lb = r'(?:\s*\\?\s*)' # dockerfile line break regex flags = re.IGNORECASE | re.MULTILINE tokens = { 'root': [ (r'#.*', Comment), (r'(FROM)([ \t]*)(\S*)([ \t]*)(?:(AS)([ \t]*)(\S*))?', bygroups(Keyword, Text, String, Text, Keyword, Text, String)), (r'(ONBUILD)(%s)' % (_lb, ), bygroups(Keyword, using(BashLexer))), (r'(HEALTHCHECK)((%s--\w+=\w+%s)*)' % (_lb, _lb), bygroups(Keyword, using(BashLexer))), (r'(VOLUME|ENTRYPOINT|CMD|SHELL)(%s)(\[.*?\])' % (_lb, ), bygroups(Keyword, using(BashLexer), using(JsonLexer))), (r'(LABEL|ENV|ARG)((%s\w+=\w+%s)*)' % (_lb, _lb), bygroups(Keyword, using(BashLexer))), (r'(%s|VOLUME)\b(.*)' % (_keywords), bygroups(Keyword, String)), (r'(%s)' % (_bash_keywords, ), Keyword), (r'(.*\\\n)*.+', using(BashLexer)), ] }
class DuelLexer(RegexLexer): """ Lexer for Duel Views Engine (formerly JBST) markup with JavaScript code blocks. See http://duelengine.org/. See http://jsonml.org/jbst/. .. versionadded:: 1.4 """ name = 'Duel' aliases = ['duel', 'jbst', 'jsonml+bst'] filenames = ['*.duel', '*.jbst'] mimetypes = ['text/x-duel', 'text/x-jbst'] flags = re.DOTALL tokens = { 'root': [ (r'(<%[@=#!:]?)(.*?)(%>)', bygroups(Name.Tag, using(JavascriptLexer), Name.Tag)), (r'(<%\$)(.*?)(:)(.*?)(%>)', bygroups(Name.Tag, Name.Function, Punctuation, String, Name.Tag)), (r'(<%--)(.*?)(--%>)', bygroups(Name.Tag, Comment.Multiline, Name.Tag)), (r'(<script.*?>)(.*?)(</script>)', bygroups(using(HtmlLexer), using(JavascriptLexer), using(HtmlLexer))), (r'(.+?)(?=<)', using(HtmlLexer)), (r'.+', using(HtmlLexer)), ], }
def _objdump_lexer_tokens(asm_lexer): """ Common objdump lexer tokens to wrap an ASM lexer. """ hex_re = r'[0-9A-Za-z]' return { 'root': [ # File name & format: ('(.*?)(:)( +file format )(.*?)$', bygroups(Name.Label, Punctuation, Text, String)), # Section header ('(Disassembly of section )(.*?)(:)$', bygroups(Text, Name.Label, Punctuation)), # Function labels # (With offset) ('(' + hex_re + '+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$', bygroups(Number.Hex, Text, Punctuation, Name.Function, Punctuation, Number.Hex, Punctuation)), # (Without offset) ('(' + hex_re + '+)( )(<)(.*?)(>:)$', bygroups(Number.Hex, Text, Punctuation, Name.Function, Punctuation)), # Code line with disassembled instructions ('( *)(' + hex_re + r'+:)(\t)((?:' + hex_re + hex_re + ' )+)( *\t)([a-zA-Z].*?)$', bygroups(Text, Name.Label, Text, Number.Hex, Text, using(asm_lexer))), # Code line with ascii ('( *)(' + hex_re + r'+:)(\t)((?:' + hex_re + hex_re + ' )+)( *)(.*?)$', bygroups(Text, Name.Label, Text, Number.Hex, Text, String)), # Continued code line, only raw opcodes without disassembled # instruction ('( *)(' + hex_re + r'+:)(\t)((?:' + hex_re + hex_re + ' )+)$', bygroups(Text, Name.Label, Text, Number.Hex)), # Skipped a few bytes (r'\t\.\.\.$', Text), # Relocation line # (With offset) (r'(\t\t\t)(' + hex_re + r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x' + hex_re + '+)$', bygroups(Text, Name.Label, Text, Name.Property, Text, Name.Constant, Punctuation, Number.Hex)), # (Without offset) (r'(\t\t\t)(' + hex_re + r'+:)( )([^\t]+)(\t)(.*?)$', bygroups(Text, Name.Label, Text, Name.Property, Text, Name.Constant)), (r'[^\n]+\n', Other) ] }
def _make_redirect_state(compound, _core_token_compound=_core_token_compound, _nl=_nl, _punct=_punct, _stoken=_stoken, _string=_string, _space=_space, _variable=_variable, _ws=_ws): stoken_compound = (r'(?:[%s]+|(?:%s|%s|%s)+)' % (_punct, _string, _variable, _core_token_compound)) return [ (r'((?:(?<=[%s%s])\d)?)(>>?&|<&)([%s%s]*)(\d)' % (_nl, _ws, _nl, _ws), bygroups(Number.Integer, Punctuation, Text, Number.Integer)), (r'((?:(?<=[%s%s])(?<!\^[%s])\d)?)(>>?|<)(%s?%s)' % (_nl, _ws, _nl, _space, stoken_compound if compound else _stoken), bygroups(Number.Integer, Punctuation, using(this, state='text'))) ]
class MoinWikiLexer(RegexLexer): """ For MoinMoin (and Trac) Wiki markup. .. versionadded:: 0.7 """ name = 'MoinMoin/Trac Wiki markup' aliases = ['trac-wiki', 'moin'] filenames = [] mimetypes = ['text/x-trac-wiki'] flags = re.MULTILINE | re.IGNORECASE tokens = { 'root': [ (r'^#.*$', Comment), (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next # Titles (r'^(=+)([^=]+)(=+)(\s*#.+)?$', bygroups(Generic.Heading, using(this), Generic.Heading, String)), # Literal code blocks, with optional shebang (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'), (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting # Lists (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)), (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)), # Other Formatting (r'\[\[\w+.*?\]\]', Keyword), # Macro (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])', bygroups(Keyword, String, Keyword)), # Link (r'^----+$', Keyword), # Horizontal rules (r'[^\n\'\[{!_~^,|]+', Text), (r'\n', Text), (r'.', Text), ], 'codeblock': [ (r'\}\}\}', Name.Builtin, '#pop'), # these blocks are allowed to be nested in Trac, but not MoinMoin (r'\{\{\{', Text, '#push'), (r'[^{}]+', Comment.Preproc), # slurp boring text (r'.', Comment.Preproc), # allow loose { or } ], }
class SingularityLexer(RegexLexer): """ Lexer for `Singularity definition files <https://www.sylabs.io/guides/3.0/user-guide/definition_files.html>`_. .. versionadded:: 2.6 """ name = 'Singularity' aliases = ['singularity'] filenames = ['*.def', 'Singularity'] flags = re.IGNORECASE | re.MULTILINE | re.DOTALL _headers = r'^(\s*)(bootstrap|from|osversion|mirrorurl|include|registry|namespace|includecmd)(:)' _section = r'^%(?:pre|post|setup|environment|help|labels|test|runscript|files|startscript)\b' _appsect = r'^%app(?:install|help|run|labels|env|test|files)\b' tokens = { 'root': [ (_section, Generic.Heading, 'script'), (_appsect, Generic.Heading, 'script'), (_headers, bygroups(Text, Keyword, Text)), (r'\s*#.*?\n', Comment), (r'\b(([0-9]+\.?[0-9]*)|(\.[0-9]+))\b', Number), (r'(?!^\s*%).', Text), ], 'script': [ (r'(.+?(?=^\s*%))|(.*)', using(BashLexer), '#pop'), ], } def analyse_text(text): """This is a quite simple script file, but there are a few keywords which seem unique to this language.""" result = 0 if re.search(r'\b(?:osversion|includecmd|mirrorurl)\b', text, re.IGNORECASE): result += 0.5 if re.search(SingularityLexer._section[1:], text): result += 0.49 return result
class PythonTracebackLexer(RegexLexer): """ For Python 3.x tracebacks, with support for chained exceptions. .. versionadded:: 1.0 .. versionchanged:: 2.5 This is now the default ``PythonTracebackLexer``. It is still available as the alias ``Python3TracebackLexer``. """ name = 'Python Traceback' aliases = ['pytb', 'py3tb'] filenames = ['*.pytb', '*.py3tb'] mimetypes = ['text/x-python-traceback', 'text/x-python3-traceback'] tokens = { 'root': [ (r'\n', Text), (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'), (r'^During handling of the above exception, another ' r'exception occurred:\n\n', Generic.Traceback), (r'^The above exception was the direct cause of the ' r'following exception:\n\n', Generic.Traceback), (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'), (r'^.*\n', Other), ], 'intb': [ (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)', bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)), (r'^( File )("[^"]+")(, line )(\d+)(\n)', bygroups(Text, Name.Builtin, Text, Number, Text)), (r'^( )(.+)(\n)', bygroups(Text, using(PythonLexer), Text)), (r'^([ \t]*)(\.\.\.)(\n)', bygroups(Text, Comment, Text)), # for doctests... (r'^([^:]+)(: )(.+)(\n)', bygroups(Generic.Error, Text, Name, Text), '#pop'), (r'^([a-zA-Z_][\w.]*)(:?\n)', bygroups(Generic.Error, Text), '#pop') ], }
class MxmlLexer(RegexLexer): """ For MXML markup. Nested AS3 in <script> tags is highlighted by the appropriate lexer. .. versionadded:: 1.1 """ flags = re.MULTILINE | re.DOTALL name = 'MXML' aliases = ['mxml'] filenames = ['*.mxml'] mimetimes = ['text/xml', 'application/xml'] tokens = { 'root': [ ('[^<&]+', Text), (r'&\S*?;', Name.Entity), (r'(\<\!\[CDATA\[)(.*?)(\]\]\>)', bygroups(String, using(ActionScript3Lexer), String)), ('<!--', Comment, 'comment'), (r'<\?.*?\?>', Comment.Preproc), ('<![^>]*>', Comment.Preproc), (r'<\s*[\w:.-]+', Name.Tag, 'tag'), (r'<\s*/\s*[\w:.-]+\s*>', Name.Tag), ], 'comment': [ ('[^-]+', Comment), ('-->', Comment, '#pop'), ('-', Comment), ], 'tag': [ (r'\s+', Text), (r'[\w.:-]+\s*=', Name.Attribute, 'attr'), (r'/?\s*>', Name.Tag, '#pop'), ], 'attr': [ (r'\s+', Text), ('".*?"', String, '#pop'), ("'.*?'", String, '#pop'), (r'[^\s>]+', String, '#pop'), ], }
def _make_arithmetic_state(compound, _nl=_nl, _punct=_punct, _string=_string, _variable=_variable, _ws=_ws): op = r'=+\-*/!~' state = [] if compound: state.append((r'(?=\))', Text, '#pop')) state += [ (r'0[0-7]+', Number.Oct), (r'0x[\da-f]+', Number.Hex), (r'\d+', Number.Integer), (r'[(),]+', Punctuation), (r'([%s]|%%|\^\^)+' % op, Operator), (r'(%s|%s|(\^[%s]?)?[^()%s%%^"%s%s%s]|\^[%s%s]?%s)+' % (_string, _variable, _nl, op, _nl, _punct, _ws, _nl, _ws, r'[^)]' if compound else r'[\w\W]'), using(this, state='variable')), (r'(?=[\x00|&])', Text, '#pop'), include('follow') ] return state
class FortranFixedLexer(RegexLexer): """ Lexer for fixed format Fortran. .. versionadded:: 2.1 """ name = 'FortranFixed' aliases = ['fortranfixed'] filenames = ['*.f', '*.F'] flags = re.IGNORECASE def _lex_fortran(self, match, ctx=None): """Lex a line just as free form fortran without line break.""" lexer = FortranLexer() text = match.group(0) + "\n" for index, token, value in lexer.get_tokens_unprocessed(text): value = value.replace('\n', '') if value != '': yield index, token, value tokens = { 'root': [ (r'[C*].*\n', Comment), (r'#.*\n', Comment.Preproc), (r' {0,4}!.*\n', Comment), (r'(.{5})', Name.Label, 'cont-char'), (r'.*\n', using(FortranLexer)), ], 'cont-char': [ (' ', Text, 'code'), ('0', Comment, 'code'), ('.', Generic.Strong, 'code'), ], 'code': [ (r'(.{66})(.*)(\n)', bygroups(_lex_fortran, Comment, Text), 'root'), (r'(.*)(\n)', bygroups(_lex_fortran, Text), 'root'), default('root'), ] }
class PhpLexer(RegexLexer): """ For `PHP <http://www.php.net/>`_ source code. For PHP embedded in HTML, use the `HtmlPhpLexer`. Additional options accepted: `startinline` If given and ``True`` the lexer starts highlighting with php code (i.e.: no starting ``<?php`` required). The default is ``False``. `funcnamehighlighting` If given and ``True``, highlight builtin function names (default: ``True``). `disabledmodules` If given, must be a list of module names whose function names should not be highlighted. By default all modules are highlighted except the special ``'unknown'`` module that includes functions that are known to php but are undocumented. To get a list of allowed modules have a look into the `_php_builtins` module: .. sourcecode:: pycon >>> from typecode._vendor.pygments.lexers._php_builtins import MODULES >>> MODULES.keys() ['PHP Options/Info', 'Zip', 'dba', ...] In fact the names of those modules match the module names from the php documentation. """ name = 'PHP' aliases = ['php', 'php3', 'php4', 'php5'] filenames = ['*.php', '*.php[345]', '*.inc'] mimetypes = ['text/x-php'] # Note that a backslash is included in the following two patterns # PHP uses a backslash as a namespace separator _ident_char = r'[\\\w]|[^\x00-\x7f]' _ident_begin = r'(?:[\\_a-z]|[^\x00-\x7f])' _ident_end = r'(?:' + _ident_char + ')*' _ident_inner = _ident_begin + _ident_end flags = re.IGNORECASE | re.DOTALL | re.MULTILINE tokens = { 'root': [ (r'<\?(php)?', Comment.Preproc, 'php'), (r'[^<]+', Other), (r'<', Other) ], 'php': [ (r'\?>', Comment.Preproc, '#pop'), (r'(<<<)([\'"]?)(' + _ident_inner + r')(\2\n.*?\n\s*)(\3)(;?)(\n)', bygroups(String, String, String.Delimiter, String, String.Delimiter, Punctuation, Text)), (r'\s+', Text), (r'#.*?\n', Comment.Single), (r'//.*?\n', Comment.Single), # put the empty comment here, it is otherwise seen as # the start of a docstring (r'/\*\*/', Comment.Multiline), (r'/\*\*.*?\*/', String.Doc), (r'/\*.*?\*/', Comment.Multiline), (r'(->|::)(\s*)(' + _ident_inner + ')', bygroups(Operator, Text, Name.Attribute)), (r'[~!%^&*+=|:.<>/@-]+', Operator), (r'\?', Operator), # don't add to the charclass above! (r'[\[\]{}();,]+', Punctuation), (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), (r'(function)(\s*)(?=\()', bygroups(Keyword, Text)), (r'(function)(\s+)(&?)(\s*)', bygroups(Keyword, Text, Operator, Text), 'functionname'), (r'(const)(\s+)(' + _ident_inner + ')', bygroups(Keyword, Text, Name.Constant)), (r'(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|' r'eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|' r'FALSE|print|for|require|continue|foreach|require_once|' r'declare|return|default|static|do|switch|die|stdClass|' r'echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|' r'virtual|endfor|include_once|while|endforeach|global|' r'endif|list|endswitch|new|endwhile|not|' r'array|E_ALL|NULL|final|php_user_filter|interface|' r'implements|public|private|protected|abstract|clone|try|' r'catch|throw|this|use|namespace|trait|yield|' r'finally)\b', Keyword), (r'(true|false|null)\b', Keyword.Constant), include('magicconstants'), (r'\$\{\$+' + _ident_inner + r'\}', Name.Variable), (r'\$+' + _ident_inner, Name.Variable), (_ident_inner, Name.Other), (r'(\d+\.\d*|\d*\.\d+)(e[+-]?[0-9]+)?', Number.Float), (r'\d+e[+-]?[0-9]+', Number.Float), (r'0[0-7]+', Number.Oct), (r'0x[a-f0-9]+', Number.Hex), (r'\d+', Number.Integer), (r'0b[01]+', Number.Bin), (r"'([^'\\]*(?:\\.[^'\\]*)*)'", String.Single), (r'`([^`\\]*(?:\\.[^`\\]*)*)`', String.Backtick), (r'"', String.Double, 'string'), ], 'magicfuncs': [ # source: http://php.net/manual/en/language.oop5.magic.php (words(( '__construct', '__destruct', '__call', '__callStatic', '__get', '__set', '__isset', '__unset', '__sleep', '__wakeup', '__toString', '__invoke', '__set_state', '__clone', '__debugInfo',), suffix=r'\b'), Name.Function.Magic), ], 'magicconstants': [ # source: http://php.net/manual/en/language.constants.predefined.php (words(( '__LINE__', '__FILE__', '__DIR__', '__FUNCTION__', '__CLASS__', '__TRAIT__', '__METHOD__', '__NAMESPACE__',), suffix=r'\b'), Name.Constant), ], 'classname': [ (_ident_inner, Name.Class, '#pop') ], 'functionname': [ include('magicfuncs'), (_ident_inner, Name.Function, '#pop'), default('#pop') ], 'string': [ (r'"', String.Double, '#pop'), (r'[^{$"\\]+', String.Double), (r'\\([nrt"$\\]|[0-7]{1,3}|x[0-9a-f]{1,2})', String.Escape), (r'\$' + _ident_inner + r'(\[\S+?\]|->' + _ident_inner + ')?', String.Interpol), (r'(\{\$\{)(.*?)(\}\})', bygroups(String.Interpol, using(this, _startinline=True), String.Interpol)), (r'(\{)(\$.*?)(\})', bygroups(String.Interpol, using(this, _startinline=True), String.Interpol)), (r'(\$\{)(\S+)(\})', bygroups(String.Interpol, Name.Variable, String.Interpol)), (r'[${\\]', String.Double) ], } def __init__(self, **options): self.funcnamehighlighting = get_bool_opt( options, 'funcnamehighlighting', True) self.disabledmodules = get_list_opt( options, 'disabledmodules', ['unknown']) self.startinline = get_bool_opt(options, 'startinline', False) # private option argument for the lexer itself if '_startinline' in options: self.startinline = options.pop('_startinline') # collect activated functions in a set self._functions = set() if self.funcnamehighlighting: from typecode._vendor.pygments.lexers._php_builtins import MODULES for key, value in iteritems(MODULES): if key not in self.disabledmodules: self._functions.update(value) RegexLexer.__init__(self, **options) def get_tokens_unprocessed(self, text): stack = ['root'] if self.startinline: stack.append('php') for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text, stack): if token is Name.Other: if value in self._functions: yield index, Name.Builtin, value continue yield index, token, value def analyse_text(text): if shebang_matches(text, r'php'): return True rv = 0.0 if re.search(r'<\?(?!xml)', text): rv += 0.3 return rv
class LlvmMirLexer(RegexLexer): """ Lexer for the overall LLVM MIR document format. MIR is a human readable serialization format that's used to represent LLVM's machine specific intermediate representation. It allows LLVM's developers to see the state of the compilation process at various points, as well as test individual pieces of the compiler. For more information on LLVM MIR see https://llvm.org/docs/MIRLangRef.html. .. versionadded:: 2.6 """ name = 'LLVM-MIR' aliases = ['llvm-mir'] filenames = ['*.mir'] tokens = { 'root': [ # Comments are hashes at the YAML level (r'#.*', Comment), # Documents starting with | are LLVM-IR (r'--- \|$', Keyword, 'llvm_ir'), # Other documents are MIR (r'---', Keyword, 'llvm_mir'), # Consume everything else in one token for efficiency (r'[^-#]+|.', Text), ], 'llvm_ir': [ # Documents end with '...' or '---' (r'(\.\.\.|(?=---))', Keyword, '#pop'), # Delegate to the LlvmLexer (r'((?:.|\n)+?)(?=(\.\.\.|---))', bygroups(using(LlvmLexer))), ], 'llvm_mir': [ # Comments are hashes at the YAML level (r'#.*', Comment), # Documents end with '...' or '---' (r'(\.\.\.|(?=---))', Keyword, '#pop'), # Handle the simple attributes (r'name:', Keyword, 'name'), (words(('alignment', ), suffix=':'), Keyword, 'number'), (words(('legalized', 'regBankSelected', 'tracksRegLiveness', 'selected', 'exposesReturnsTwice'), suffix=':'), Keyword, 'boolean'), # Handle the attributes don't highlight inside (words(('registers', 'stack', 'fixedStack', 'liveins', 'frameInfo', 'machineFunctionInfo'), suffix=':'), Keyword), # Delegate the body block to the LlvmMirBodyLexer (r'body: *\|', Keyword, 'llvm_mir_body'), # Consume everything else (r'.+', Text), (r'\n', Text), ], 'name': [ (r'[^\n]+', Name), default('#pop'), ], 'boolean': [ (r' *(true|false)', Name.Builtin), default('#pop'), ], 'number': [ (r' *[0-9]+', Number), default('#pop'), ], 'llvm_mir_body': [ # Documents end with '...' or '---'. # We have to pop llvm_mir_body and llvm_mir (r'(\.\.\.|(?=---))', Keyword, '#pop:2'), # Delegate the body block to the LlvmMirBodyLexer (r'((?:.|\n)+?)(?=\.\.\.|---)', bygroups(using(LlvmMirBodyLexer))), # The '...' is optional. If we didn't already find it then it isn't # there. There might be a '---' instead though. (r'(?!\.\.\.|---)((?:.|\n)+)', bygroups(using(LlvmMirBodyLexer))), ], }
class GeneratedObjectiveCVariant(baselexer): """ Implements Objective-C syntax on top of an existing C family lexer. """ tokens = { 'statements': [ (r'@"', String, 'string'), (r'@(YES|NO)', Number), (r"@'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), (r'@(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float), (r'@(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'@0x[0-9a-fA-F]+[Ll]?', Number.Hex), (r'@0[0-7]+[Ll]?', Number.Oct), (r'@\d+[Ll]?', Number.Integer), (r'@\(', Literal, 'literal_number'), (r'@\[', Literal, 'literal_array'), (r'@\{', Literal, 'literal_dictionary'), (words(('@selector', '@private', '@protected', '@public', '@encode', '@synchronized', '@try', '@throw', '@catch', '@finally', '@end', '@property', '@synthesize', '__bridge', '__bridge_transfer', '__autoreleasing', '__block', '__weak', '__strong', 'weak', 'strong', 'copy', 'retain', 'assign', 'unsafe_unretained', 'atomic', 'nonatomic', 'readonly', 'readwrite', 'setter', 'getter', 'typeof', 'in', 'out', 'inout', 'release', 'class', '@dynamic', '@optional', '@required', '@autoreleasepool', '@import'), suffix=r'\b'), Keyword), (words(('id', 'instancetype', 'Class', 'IMP', 'SEL', 'BOOL', 'IBOutlet', 'IBAction', 'unichar'), suffix=r'\b'), Keyword.Type), (r'@(true|false|YES|NO)\n', Name.Builtin), (r'(YES|NO|nil|self|super)\b', Name.Builtin), # Carbon types (r'(Boolean|UInt8|SInt8|UInt16|SInt16|UInt32|SInt32)\b', Keyword.Type), # Carbon built-ins (r'(TRUE|FALSE)\b', Name.Builtin), (r'(@interface|@implementation)(\s+)', bygroups(Keyword, Text), ('#pop', 'oc_classname')), (r'(@class|@protocol)(\s+)', bygroups(Keyword, Text), ('#pop', 'oc_forward_classname')), # @ can also prefix other expressions like @{...} or @(...) (r'@', Punctuation), inherit, ], 'oc_classname': [ # interface definition that inherits (r'([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?(\s*)(\{)', bygroups(Name.Class, Text, Name.Class, Text, Punctuation), ('#pop', 'oc_ivars')), (r'([a-zA-Z$_][\w$]*)(\s*:\s*)([a-zA-Z$_][\w$]*)?', bygroups(Name.Class, Text, Name.Class), '#pop'), # interface definition for a category (r'([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))(\s*)(\{)', bygroups(Name.Class, Text, Name.Label, Text, Punctuation), ('#pop', 'oc_ivars')), (r'([a-zA-Z$_][\w$]*)(\s*)(\([a-zA-Z$_][\w$]*\))', bygroups(Name.Class, Text, Name.Label), '#pop'), # simple interface / implementation (r'([a-zA-Z$_][\w$]*)(\s*)(\{)', bygroups(Name.Class, Text, Punctuation), ('#pop', 'oc_ivars')), (r'([a-zA-Z$_][\w$]*)', Name.Class, '#pop') ], 'oc_forward_classname': [(r'([a-zA-Z$_][\w$]*)(\s*,\s*)', bygroups(Name.Class, Text), 'oc_forward_classname'), (r'([a-zA-Z$_][\w$]*)(\s*;?)', bygroups(Name.Class, Text), '#pop')], 'oc_ivars': [ include('whitespace'), include('statements'), (';', Punctuation), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'root': [ # methods ( r'^([-+])(\s*)' # method marker r'(\(.*?\))?(\s*)' # return type r'([a-zA-Z$_][\w$]*:?)', # begin of method name bygroups(Punctuation, Text, using(this), Text, Name.Function), 'method'), inherit, ], 'method': [ include('whitespace'), # TODO unsure if ellipses are allowed elsewhere, see # discussion in Issue 789 (r',', Punctuation), (r'\.\.\.', Punctuation), (r'(\(.*?\))(\s*)([a-zA-Z$_][\w$]*)', bygroups(using(this), Text, Name.Variable)), (r'[a-zA-Z$_][\w$]*:', Name.Function), (';', Punctuation, '#pop'), (r'\{', Punctuation, 'function'), default('#pop'), ], 'literal_number': [ (r'\(', Punctuation, 'literal_number_inner'), (r'\)', Literal, '#pop'), include('statement'), ], 'literal_number_inner': [ (r'\(', Punctuation, '#push'), (r'\)', Punctuation, '#pop'), include('statement'), ], 'literal_array': [ (r'\[', Punctuation, 'literal_array_inner'), (r'\]', Literal, '#pop'), include('statement'), ], 'literal_array_inner': [ (r'\[', Punctuation, '#push'), (r'\]', Punctuation, '#pop'), include('statement'), ], 'literal_dictionary': [ (r'\}', Literal, '#pop'), include('statement'), ], } def analyse_text(text): if _oc_keywords.search(text): return 1.0 elif '@"' in text: # strings return 0.8 elif re.search('@[0-9]+', text): return 0.7 elif _oc_message.search(text): return 0.8 return 0 def get_tokens_unprocessed(self, text): from typecode._vendor.pygments.lexers._cocoa_builtins import COCOA_INTERFACES, \ COCOA_PROTOCOLS, COCOA_PRIMITIVES for index, token, value in \ baselexer.get_tokens_unprocessed(self, text): if token is Name or token is Name.Class: if value in COCOA_INTERFACES or value in COCOA_PROTOCOLS \ or value in COCOA_PRIMITIVES: token = Name.Builtin.Pseudo yield index, token, value
class ScamlLexer(ExtendedRegexLexer): """ For `Scaml markup <http://scalate.fusesource.org/>`_. Scaml is Haml for Scala. .. versionadded:: 1.4 """ name = 'Scaml' aliases = ['scaml'] filenames = ['*.scaml'] mimetypes = ['text/x-scaml'] flags = re.IGNORECASE # Scaml does not yet support the " |\n" notation to # wrap long lines. Once it does, use the custom faux # dot instead. # _dot = r'(?: \|\n(?=.* \|)|.)' _dot = r'.' tokens = { 'root': [ (r'[ \t]*\n', Text), (r'[ \t]*', _indentation), ], 'css': [ (r'\.[\w:-]+', Name.Class, 'tag'), (r'\#[\w:-]+', Name.Function, 'tag'), ], 'eval-or-plain': [ (r'[&!]?==', Punctuation, 'plain'), (r'([&!]?[=~])(' + _dot + r'*\n)', bygroups(Punctuation, using(ScalaLexer)), 'root'), default('plain'), ], 'content': [ include('css'), (r'%[\w:-]+', Name.Tag, 'tag'), (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'), (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)', bygroups(Comment, Comment.Special, Comment), '#pop'), (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'), '#pop'), (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc, 'scaml-comment-block'), '#pop'), (r'(-@\s*)(import)?(' + _dot + r'*\n)', bygroups(Punctuation, Keyword, using(ScalaLexer)), '#pop'), (r'(-)(' + _dot + r'*\n)', bygroups(Punctuation, using(ScalaLexer)), '#pop'), (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'), '#pop'), include('eval-or-plain'), ], 'tag': [ include('css'), (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)), (r'\[' + _dot + r'*?\]', using(ScalaLexer)), (r'\(', Text, 'html-attributes'), (r'/[ \t]*\n', Punctuation, '#pop:2'), (r'[<>]{1,2}(?=[ \t=])', Punctuation), include('eval-or-plain'), ], 'plain': [ (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text), (r'(#\{)(' + _dot + r'*?)(\})', bygroups(String.Interpol, using(ScalaLexer), String.Interpol)), (r'\n', Text, 'root'), ], 'html-attributes': [ (r'\s+', Text), (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'), (r'[\w:-]+', Name.Attribute), (r'\)', Text, '#pop'), ], 'html-attribute-value': [ (r'[ \t]+', Text), (r'\w+', Name.Variable, '#pop'), (r'@\w+', Name.Variable.Instance, '#pop'), (r'\$\w+', Name.Variable.Global, '#pop'), (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'), (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'), ], 'html-comment-block': [ (_dot + '+', Comment), (r'\n', Text, 'root'), ], 'scaml-comment-block': [ (_dot + '+', Comment.Preproc), (r'\n', Text, 'root'), ], 'filter-block': [ (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator), (r'(#\{)(' + _dot + r'*?)(\})', bygroups(String.Interpol, using(ScalaLexer), String.Interpol)), (r'\n', Text, 'root'), ], }
class HamlLexer(ExtendedRegexLexer): """ For Haml markup. .. versionadded:: 1.3 """ name = 'Haml' aliases = ['haml'] filenames = ['*.haml'] mimetypes = ['text/x-haml'] flags = re.IGNORECASE # Haml can include " |\n" anywhere, # which is ignored and used to wrap long lines. # To accomodate this, use this custom faux dot instead. _dot = r'(?: \|\n(?=.* \|)|.)' # In certain places, a comma at the end of the line # allows line wrapping as well. _comma_dot = r'(?:,\s*\n|' + _dot + ')' tokens = { 'root': [ (r'[ \t]*\n', Text), (r'[ \t]*', _indentation), ], 'css': [ (r'\.[\w:-]+', Name.Class, 'tag'), (r'\#[\w:-]+', Name.Function, 'tag'), ], 'eval-or-plain': [ (r'[&!]?==', Punctuation, 'plain'), (r'([&!]?[=~])(' + _comma_dot + r'*\n)', bygroups(Punctuation, using(RubyLexer)), 'root'), default('plain'), ], 'content': [ include('css'), (r'%[\w:-]+', Name.Tag, 'tag'), (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'), (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)', bygroups(Comment, Comment.Special, Comment), '#pop'), (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'), '#pop'), (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc, 'haml-comment-block'), '#pop'), (r'(-)(' + _comma_dot + r'*\n)', bygroups(Punctuation, using(RubyLexer)), '#pop'), (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'), '#pop'), include('eval-or-plain'), ], 'tag': [ include('css'), (r'\{(,\n|' + _dot + r')*?\}', using(RubyLexer)), (r'\[' + _dot + r'*?\]', using(RubyLexer)), (r'\(', Text, 'html-attributes'), (r'/[ \t]*\n', Punctuation, '#pop:2'), (r'[<>]{1,2}(?=[ \t=])', Punctuation), include('eval-or-plain'), ], 'plain': [ (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text), (r'(#\{)(' + _dot + r'*?)(\})', bygroups(String.Interpol, using(RubyLexer), String.Interpol)), (r'\n', Text, 'root'), ], 'html-attributes': [ (r'\s+', Text), (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'), (r'[\w:-]+', Name.Attribute), (r'\)', Text, '#pop'), ], 'html-attribute-value': [ (r'[ \t]+', Text), (r'\w+', Name.Variable, '#pop'), (r'@\w+', Name.Variable.Instance, '#pop'), (r'\$\w+', Name.Variable.Global, '#pop'), (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'), (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'), ], 'html-comment-block': [ (_dot + '+', Comment), (r'\n', Text, 'root'), ], 'haml-comment-block': [ (_dot + '+', Comment.Preproc), (r'\n', Text, 'root'), ], 'filter-block': [ (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator), (r'(#\{)(' + _dot + r'*?)(\})', bygroups(String.Interpol, using(RubyLexer), String.Interpol)), (r'\n', Text, 'root'), ], }
class CypherLexer(RegexLexer): """ For `Cypher Query Language <https://neo4j.com/docs/developer-manual/3.3/cypher/>`_ For the Cypher version in Neo4j 3.3 .. versionadded:: 2.0 """ name = 'Cypher' aliases = ['cypher'] filenames = ['*.cyp', '*.cypher'] flags = re.MULTILINE | re.IGNORECASE tokens = { 'root': [ include('comment'), include('keywords'), include('clauses'), include('relations'), include('strings'), include('whitespace'), include('barewords'), ], 'comment': [ (r'^.*//.*\n', Comment.Single), ], 'keywords': [ (r'(create|order|match|limit|set|skip|start|return|with|where|' r'delete|foreach|not|by|true|false)\b', Keyword), ], 'clauses': [ # based on https://neo4j.com/docs/cypher-refcard/3.3/ (r'(all|any|as|asc|ascending|assert|call|case|create|' r'create\s+index|create\s+unique|delete|desc|descending|' r'distinct|drop\s+constraint\s+on|drop\s+index\s+on|end|' r'ends\s+with|fieldterminator|foreach|in|is\s+node\s+key|' r'is\s+null|is\s+unique|limit|load\s+csv\s+from|match|merge|none|' r'not|null|on\s+match|on\s+create|optional\s+match|order\s+by|' r'remove|return|set|skip|single|start|starts\s+with|then|union|' r'union\s+all|unwind|using\s+periodic\s+commit|yield|where|when|' r'with)\b', Keyword), ], 'relations': [ (r'(-\[)(.*?)(\]->)', bygroups(Operator, using(this), Operator)), (r'(<-\[)(.*?)(\]-)', bygroups(Operator, using(this), Operator)), (r'(-\[)(.*?)(\]-)', bygroups(Operator, using(this), Operator)), (r'-->|<--|\[|\]', Operator), (r'<|>|<>|=|<=|=>|\(|\)|\||:|,|;', Punctuation), (r'[.*{}]', Punctuation), ], 'strings': [ (r'"(?:\\[tbnrf\'"\\]|[^\\"])*"', String), (r'`(?:``|[^`])+`', Name.Variable), ], 'whitespace': [ (r'\s+', Whitespace), ], 'barewords': [ (r'[a-z]\w*', Name), (r'\d+', Number), ], }
class VCLLexer(RegexLexer): """ For Varnish Configuration Language (VCL). .. versionadded:: 2.2 """ name = 'VCL' aliases = ['vcl'] filenames = ['*.vcl'] mimetypes = ['text/x-vclsrc'] def analyse_text(text): # If the very first line is 'vcl 4.0;' it's pretty much guaranteed # that this is VCL if text.startswith('vcl 4.0;'): return 1.0 # Skip over comments and blank lines # This is accurate enough that returning 0.9 is reasonable. # Almost no VCL files start without some comments. elif '\nvcl 4.0;' in text[:1000]: return 0.9 tokens = { 'probe': [ include('whitespace'), include('comments'), (r'(\.\w+)(\s*=\s*)([^;]*)(;)', bygroups(Name.Attribute, Operator, using(this), Punctuation)), (r'\}', Punctuation, '#pop'), ], 'acl': [ include('whitespace'), include('comments'), (r'[!/]+', Operator), (r';', Punctuation), (r'\d+', Number), (r'\}', Punctuation, '#pop'), ], 'backend': [ include('whitespace'), (r'(\.probe)(\s*=\s*)(\w+)(;)', bygroups(Name.Attribute, Operator, Name.Variable.Global, Punctuation)), (r'(\.probe)(\s*=\s*)(\{)', bygroups(Name.Attribute, Operator, Punctuation), 'probe'), (r'(\.\w+\b)(\s*=\s*)([^;\s]*)(\s*;)', bygroups(Name.Attribute, Operator, using(this), Punctuation)), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'statements': [ (r'(\d\.)?\d+[sdwhmy]', Literal.Date), (r'(\d\.)?\d+ms', Literal.Date), (r'(vcl_pass|vcl_hash|vcl_hit|vcl_init|vcl_backend_fetch|vcl_pipe|' r'vcl_backend_response|vcl_synth|vcl_deliver|vcl_backend_error|' r'vcl_fini|vcl_recv|vcl_purge|vcl_miss)\b', Name.Function), (r'(pipe|retry|hash|synth|deliver|purge|abandon|lookup|pass|fail|ok|' r'miss|fetch|restart)\b', Name.Constant), (r'(beresp|obj|resp|req|req_top|bereq)\.http\.[a-zA-Z_-]+\b', Name.Variable), (words( ('obj.status', 'req.hash_always_miss', 'beresp.backend', 'req.esi_level', 'req.can_gzip', 'beresp.ttl', 'obj.uncacheable', 'req.ttl', 'obj.hits', 'client.identity', 'req.hash_ignore_busy', 'obj.reason', 'req.xid', 'req_top.proto', 'beresp.age', 'obj.proto', 'obj.age', 'local.ip', 'beresp.uncacheable', 'req.method', 'beresp.backend.ip', 'now', 'obj.grace', 'req.restarts', 'beresp.keep', 'req.proto', 'resp.proto', 'bereq.xid', 'bereq.between_bytes_timeout', 'req.esi', 'bereq.first_byte_timeout', 'bereq.method', 'bereq.connect_timeout', 'beresp.do_gzip', 'resp.status', 'beresp.do_gunzip', 'beresp.storage_hint', 'resp.is_streaming', 'beresp.do_stream', 'req_top.method', 'bereq.backend', 'beresp.backend.name', 'beresp.status', 'req.url', 'obj.keep', 'obj.ttl', 'beresp.reason', 'bereq.retries', 'resp.reason', 'bereq.url', 'beresp.do_esi', 'beresp.proto', 'client.ip', 'bereq.proto', 'server.hostname', 'remote.ip', 'req.backend_hint', 'server.identity', 'req_top.url', 'beresp.grace', 'beresp.was_304', 'server.ip', 'bereq.uncacheable'), suffix=r'\b'), Name.Variable), (r'[!%&+*\-,/<.}{>=|~]+', Operator), (r'[();]', Punctuation), (r'[,]+', Punctuation), (words(('hash_data', 'regsub', 'regsuball', 'if', 'else', 'elsif', 'elif', 'synth', 'synthetic', 'ban', 'return', 'set', 'unset', 'import', 'include', 'new', 'rollback', 'call'), suffix=r'\b'), Keyword), (r'storage\.\w+\.\w+\b', Name.Variable), (words(('true', 'false')), Name.Builtin), (r'\d+\b', Number), (r'(backend)(\s+\w+)(\s*\{)', bygroups(Keyword, Name.Variable.Global, Punctuation), 'backend'), (r'(probe\s)(\s*\w+\s)(\{)', bygroups(Keyword, Name.Variable.Global, Punctuation), 'probe'), (r'(acl\s)(\s*\w+\s)(\{)', bygroups(Keyword, Name.Variable.Global, Punctuation), 'acl'), (r'(vcl )(4.0)(;)$', bygroups(Keyword.Reserved, Name.Constant, Punctuation)), (r'(sub\s+)([a-zA-Z]\w*)(\s*\{)', bygroups(Keyword, Name.Function, Punctuation)), (r'([a-zA-Z_]\w*)' r'(\.)' r'([a-zA-Z_]\w*)' r'(\s*\(.*\))', bygroups(Name.Function, Punctuation, Name.Function, using(this))), (r'[a-zA-Z_]\w*', Name), ], 'comment': [ (r'[^*/]+', Comment.Multiline), (r'/\*', Comment.Multiline, '#push'), (r'\*/', Comment.Multiline, '#pop'), (r'[*/]', Comment.Multiline), ], 'comments': [ (r'#.*$', Comment), (r'/\*', Comment.Multiline, 'comment'), (r'//.*$', Comment), ], 'string': [ (r'"', String, '#pop'), (r'[^"\n]+', String), # all other characters ], 'multistring': [ (r'[^"}]', String), (r'"\}', String, '#pop'), (r'["}]', String), ], 'whitespace': [ (r'L?"', String, 'string'), (r'\{"', String, 'multistring'), (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation ], 'root': [ include('whitespace'), include('comments'), include('statements'), (r'\s+', Text), ], }
class VhdlLexer(RegexLexer): """ For VHDL source code. .. versionadded:: 1.5 """ name = 'vhdl' aliases = ['vhdl'] filenames = ['*.vhdl', '*.vhd'] mimetypes = ['text/x-vhdl'] flags = re.MULTILINE | re.IGNORECASE tokens = { 'root': [ (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation (r'--.*?$', Comment.Single), (r"'(U|X|0|1|Z|W|L|H|-)'", String.Char), (r'[~!%^&*+=|?:<>/-]', Operator), (r"'[a-z_]\w*", Name.Attribute), (r'[()\[\],.;\']', Punctuation), (r'"[^\n\\"]*"', String), (r'(library)(\s+)([a-z_]\w*)', bygroups(Keyword, Text, Name.Namespace)), (r'(use)(\s+)(entity)', bygroups(Keyword, Text, Keyword)), (r'(use)(\s+)([a-z_][\w.]*\.)(all)', bygroups(Keyword, Text, Name.Namespace, Keyword)), (r'(use)(\s+)([a-z_][\w.]*)', bygroups(Keyword, Text, Name.Namespace)), (r'(std|ieee)(\.[a-z_]\w*)', bygroups(Name.Namespace, Name.Namespace)), (words(('std', 'ieee', 'work'), suffix=r'\b'), Name.Namespace), (r'(entity|component)(\s+)([a-z_]\w*)', bygroups(Keyword, Text, Name.Class)), (r'(architecture|configuration)(\s+)([a-z_]\w*)(\s+)' r'(of)(\s+)([a-z_]\w*)(\s+)(is)', bygroups(Keyword, Text, Name.Class, Text, Keyword, Text, Name.Class, Text, Keyword)), (r'([a-z_]\w*)(:)(\s+)(process|for)', bygroups(Name.Class, Operator, Text, Keyword)), (r'(end)(\s+)', bygroups(using(this), Text), 'endblock'), include('types'), include('keywords'), include('numbers'), (r'[a-z_]\w*', Name), ], 'endblock': [ include('keywords'), (r'[a-z_]\w*', Name.Class), (r'(\s+)', Text), (r';', Punctuation, '#pop'), ], 'types': [ (words(('boolean', 'bit', 'character', 'severity_level', 'integer', 'time', 'delay_length', 'natural', 'positive', 'string', 'bit_vector', 'file_open_kind', 'file_open_status', 'std_ulogic', 'std_ulogic_vector', 'std_logic', 'std_logic_vector', 'signed', 'unsigned'), suffix=r'\b'), Keyword.Type), ], 'keywords': [ (words( ('abs', 'access', 'after', 'alias', 'all', 'and', 'architecture', 'array', 'assert', 'attribute', 'begin', 'block', 'body', 'buffer', 'bus', 'case', 'component', 'configuration', 'constant', 'disconnect', 'downto', 'else', 'elsif', 'end', 'entity', 'exit', 'file', 'for', 'function', 'generate', 'generic', 'group', 'guarded', 'if', 'impure', 'in', 'inertial', 'inout', 'is', 'label', 'library', 'linkage', 'literal', 'loop', 'map', 'mod', 'nand', 'new', 'next', 'nor', 'not', 'null', 'of', 'on', 'open', 'or', 'others', 'out', 'package', 'port', 'postponed', 'procedure', 'process', 'pure', 'range', 'record', 'register', 'reject', 'rem', 'return', 'rol', 'ror', 'select', 'severity', 'signal', 'shared', 'sla', 'sll', 'sra', 'srl', 'subtype', 'then', 'to', 'transport', 'type', 'units', 'until', 'use', 'variable', 'wait', 'when', 'while', 'with', 'xnor', 'xor'), suffix=r'\b'), Keyword), ], 'numbers': [ (r'\d{1,2}#[0-9a-f_]+#?', Number.Integer), (r'\d+', Number.Integer), (r'(\d+\.\d*|\.\d+|\d+)E[+-]?\d+', Number.Float), (r'X"[0-9a-f_]+"', Number.Hex), (r'O"[0-7_]+"', Number.Oct), (r'B"[01_]+"', Number.Bin), ], }
class RPMSpecLexer(RegexLexer): """ For RPM ``.spec`` files. .. versionadded:: 1.6 """ name = 'RPMSpec' aliases = ['spec'] filenames = ['*.spec'] mimetypes = ['text/x-rpm-spec'] _directives = ('(?:package|prep|build|install|clean|check|pre[a-z]*|' 'post[a-z]*|trigger[a-z]*|files)') tokens = { 'root': [ (r'#.*\n', Comment), include('basic'), ], 'description': [ (r'^(%' + _directives + ')(.*)$', bygroups(Name.Decorator, Text), '#pop'), (r'\n', Text), (r'.', Text), ], 'changelog': [ (r'\*.*\n', Generic.Subheading), (r'^(%' + _directives + ')(.*)$', bygroups(Name.Decorator, Text), '#pop'), (r'\n', Text), (r'.', Text), ], 'string': [ (r'"', String.Double, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape), include('interpol'), (r'.', String.Double), ], 'basic': [ include('macro'), (r'(?i)^(Name|Version|Release|Epoch|Summary|Group|License|Packager|' r'Vendor|Icon|URL|Distribution|Prefix|Patch[0-9]*|Source[0-9]*|' r'Requires\(?[a-z]*\)?|[a-z]+Req|Obsoletes|Suggests|Provides|Conflicts|' r'Build[a-z]+|[a-z]+Arch|Auto[a-z]+)(:)(.*)$', bygroups(Generic.Heading, Punctuation, using(this))), (r'^%description', Name.Decorator, 'description'), (r'^%changelog', Name.Decorator, 'changelog'), (r'^(%' + _directives + ')(.*)$', bygroups(Name.Decorator, Text)), (r'%(attr|defattr|dir|doc(?:dir)?|setup|config(?:ure)?|' r'make(?:install)|ghost|patch[0-9]+|find_lang|exclude|verify)', Keyword), include('interpol'), (r"'.*?'", String.Single), (r'"', String.Double, 'string'), (r'.', Text), ], 'macro': [ (r'%define.*\n', Comment.Preproc), (r'%\{\!\?.*%define.*\}', Comment.Preproc), (r'(%(?:if(?:n?arch)?|else(?:if)?|endif))(.*)$', bygroups(Comment.Preproc, Text)), ], 'interpol': [ (r'%\{?__[a-z_]+\}?', Name.Function), (r'%\{?_([a-z_]+dir|[a-z_]+path|prefix)\}?', Keyword.Pseudo), (r'%\{\?\w+\}', Name.Variable), (r'\$\{?RPM_[A-Z0-9_]+\}?', Name.Variable.Global), (r'%\{[a-zA-Z]\w+\}', Keyword.Constant), ] }
class VimLexer(RegexLexer): """ Lexer for VimL script files. .. versionadded:: 0.8 """ name = 'VimL' aliases = ['vim'] filenames = [ '*.vim', '.vimrc', '.exrc', '.gvimrc', '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc' ] mimetypes = ['text/x-vim'] flags = re.MULTILINE _python = r'py(?:t(?:h(?:o(?:n)?)?)?)?' tokens = { 'root': [ (r'^([ \t:]*)(' + _python + r')([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)', bygroups(using(this), Keyword, Text, Operator, Text, Text, using(PythonLexer), Text)), (r'^([ \t:]*)(' + _python + r')([ \t])(.*)', bygroups(using(this), Keyword, Text, using(PythonLexer))), (r'^\s*".*', Comment), (r'[ \t]+', Text), # TODO: regexes can have other delims (r'/(\\\\|\\/|[^\n/])*/', String.Regex), (r'"(\\\\|\\"|[^\n"])*"', String.Double), (r"'(''|[^\n'])*'", String.Single), # Who decided that doublequote was a good comment character?? (r'(?<=\s)"[^\-:.%#=*].*', Comment), (r'-?\d+', Number), (r'#[0-9a-f]{6}', Number.Hex), (r'^:', Punctuation), (r'[()<>+=!|,~-]', Punctuation), # Inexact list. Looks decent. (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b', Keyword), (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin), (r'\b\w+\b', Name.Other), # These are postprocessed below (r'.', Text), ], } def __init__(self, **options): from typecode._vendor.pygments.lexers._vim_builtins import command, option, auto self._cmd = command self._opt = option self._aut = auto RegexLexer.__init__(self, **options) def is_in(self, w, mapping): r""" It's kind of difficult to decide if something might be a keyword in VimL because it allows you to abbreviate them. In fact, 'ab[breviate]' is a good example. :ab, :abbre, or :abbreviate are valid ways to call it so rather than making really awful regexps like:: \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b we match `\b\w+\b` and then call is_in() on those tokens. See `scripts/get_vimkw.py` for how the lists are extracted. """ p = bisect(mapping, (w, )) if p > 0: if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \ mapping[p-1][1][:len(w)] == w: return True if p < len(mapping): return mapping[p][0] == w[:len(mapping[p][0])] and \ mapping[p][1][:len(w)] == w return False def get_tokens_unprocessed(self, text): # TODO: builtins are only subsequent tokens on lines # and 'keywords' only happen at the beginning except # for :au ones for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text): if token is Name.Other: if self.is_in(value, self._cmd): yield index, Keyword, value elif self.is_in(value, self._opt) or \ self.is_in(value, self._aut): yield index, Name.Builtin, value else: yield index, Text, value else: yield index, token, value
class PugLexer(ExtendedRegexLexer): """ For Pug markup. Pug is a variant of Scaml, see: http://scalate.fusesource.org/documentation/scaml-reference.html .. versionadded:: 1.4 """ name = 'Pug' aliases = ['pug', 'jade'] filenames = ['*.pug', '*.jade'] mimetypes = ['text/x-pug', 'text/x-jade'] flags = re.IGNORECASE _dot = r'.' tokens = { 'root': [ (r'[ \t]*\n', Text), (r'[ \t]*', _indentation), ], 'css': [ (r'\.[\w:-]+', Name.Class, 'tag'), (r'\#[\w:-]+', Name.Function, 'tag'), ], 'eval-or-plain': [ (r'[&!]?==', Punctuation, 'plain'), (r'([&!]?[=~])(' + _dot + r'*\n)', bygroups(Punctuation, using(ScalaLexer)), 'root'), default('plain'), ], 'content': [ include('css'), (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'), (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)', bygroups(Comment, Comment.Special, Comment), '#pop'), (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'), '#pop'), (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc, 'scaml-comment-block'), '#pop'), (r'(-@\s*)(import)?(' + _dot + r'*\n)', bygroups(Punctuation, Keyword, using(ScalaLexer)), '#pop'), (r'(-)(' + _dot + r'*\n)', bygroups(Punctuation, using(ScalaLexer)), '#pop'), (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'), '#pop'), (r'[\w:-]+', Name.Tag, 'tag'), (r'\|', Text, 'eval-or-plain'), ], 'tag': [ include('css'), (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)), (r'\[' + _dot + r'*?\]', using(ScalaLexer)), (r'\(', Text, 'html-attributes'), (r'/[ \t]*\n', Punctuation, '#pop:2'), (r'[<>]{1,2}(?=[ \t=])', Punctuation), include('eval-or-plain'), ], 'plain': [ (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text), (r'(#\{)(' + _dot + r'*?)(\})', bygroups(String.Interpol, using(ScalaLexer), String.Interpol)), (r'\n', Text, 'root'), ], 'html-attributes': [ (r'\s+', Text), (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'), (r'[\w:-]+', Name.Attribute), (r'\)', Text, '#pop'), ], 'html-attribute-value': [ (r'[ \t]+', Text), (r'\w+', Name.Variable, '#pop'), (r'@\w+', Name.Variable.Instance, '#pop'), (r'\$\w+', Name.Variable.Global, '#pop'), (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'), (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'), ], 'html-comment-block': [ (_dot + '+', Comment), (r'\n', Text, 'root'), ], 'scaml-comment-block': [ (_dot + '+', Comment.Preproc), (r'\n', Text, 'root'), ], 'filter-block': [ (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator), (r'(#\{)(' + _dot + r'*?)(\})', bygroups(String.Interpol, using(ScalaLexer), String.Interpol)), (r'\n', Text, 'root'), ], }
class CFamilyLexer(RegexLexer): """ For C family source code. This is used as a base class to avoid repetitious definitions. """ #: optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' # The trailing ?, rather than *, avoids a geometric performance drop here. #: only one /* */ style comment _ws1 = r'\s*(?:/[*].*?[*]/\s*)?' tokens = { 'whitespace': [ # preprocessor directives: without whitespace (r'^#if\s+0', Comment.Preproc, 'if0'), ('^#', Comment.Preproc, 'macro'), # or with whitespace ('^(' + _ws1 + r')(#if\s+0)', bygroups(using(this), Comment.Preproc), 'if0'), ('^(' + _ws1 + ')(#)', bygroups(using(this), Comment.Preproc), 'macro'), (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single), (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline), # Open until EOF, so no ending delimeter (r'/(\\\n)?[*][\w\W]*', Comment.Multiline), ], 'statements': [ (r'(L?)(")', bygroups(String.Affix, String), 'string'), (r"(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')", bygroups(String.Affix, String.Char, String.Char, String.Char)), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex), (r'0[0-7]+[LlUu]*', Number.Oct), (r'\d+[LlUu]*', Number.Integer), (r'\*/', Error), (r'[~!%^&*+=|?:<>/-]', Operator), (r'[()\[\],.]', Punctuation), (words( ('asm', 'auto', 'break', 'case', 'const', 'continue', 'default', 'do', 'else', 'enum', 'extern', 'for', 'goto', 'if', 'register', 'restricted', 'return', 'sizeof', 'static', 'struct', 'switch', 'typedef', 'union', 'volatile', 'while'), suffix=r'\b'), Keyword), (r'(bool|int|long|float|short|double|char|unsigned|signed|void)\b', Keyword.Type), (words(('inline', '_inline', '__inline', 'naked', 'restrict', 'thread', 'typename'), suffix=r'\b'), Keyword.Reserved), # Vector intrinsics (r'(__m(128i|128d|128|64))\b', Keyword.Reserved), # Microsoft-isms (words(('asm', 'int8', 'based', 'except', 'int16', 'stdcall', 'cdecl', 'fastcall', 'int32', 'declspec', 'finally', 'int64', 'try', 'leave', 'wchar_t', 'w64', 'unaligned', 'raise', 'noop', 'identifier', 'forceinline', 'assume'), prefix=r'__', suffix=r'\b'), Keyword.Reserved), (r'(true|false|NULL)\b', Name.Builtin), (r'([a-zA-Z_]\w*)(\s*)(:)(?!:)', bygroups(Name.Label, Text, Punctuation)), (r'[a-zA-Z_]\w*', Name), ], 'root': [ include('whitespace'), # functions ( r'((?:[\w*\s])+?(?:\s|[*]))' # return arguments r'([a-zA-Z_]\w*)' # method name r'(\s*\([^;]*?\))' # signature r'([^;{]*)(\{)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation), 'function'), # function declarations ( r'((?:[\w*\s])+?(?:\s|[*]))' # return arguments r'([a-zA-Z_]\w*)' # method name r'(\s*\([^;]*?\))' # signature r'([^;]*)(;)', bygroups(using(this), Name.Function, using(this), using(this), Punctuation)), default('statement'), ], 'statement': [ include('whitespace'), include('statements'), ('[{}]', Punctuation), (';', Punctuation, '#pop'), ], 'function': [ include('whitespace'), include('statements'), (';', Punctuation), (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), ], 'string': [ (r'"', String, '#pop'), (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|' r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape), (r'[^\\"\n]+', String), # all other characters (r'\\\n', String), # line continuation (r'\\', String), # stray backslash ], 'macro': [ (r'(include)(' + _ws1 + r')([^\n]+)', bygroups(Comment.Preproc, Text, Comment.PreprocFile)), (r'[^/\n]+', Comment.Preproc), (r'/[*](.|\n)*?[*]/', Comment.Multiline), (r'//.*?\n', Comment.Single, '#pop'), (r'/', Comment.Preproc), (r'(?<=\\)\n', Comment.Preproc), (r'\n', Comment.Preproc, '#pop'), ], 'if0': [ (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'), (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'), (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'), (r'.*?\n', Comment), ] } stdlib_types = set( ('size_t', 'ssize_t', 'off_t', 'wchar_t', 'ptrdiff_t', 'sig_atomic_t', 'fpos_t', 'clock_t', 'time_t', 'va_list', 'jmp_buf', 'FILE', 'DIR', 'div_t', 'ldiv_t', 'mbstate_t', 'wctrans_t', 'wint_t', 'wctype_t')) c99_types = set( ('_Bool', '_Complex', 'int8_t', 'int16_t', 'int32_t', 'int64_t', 'uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', 'int_least8_t', 'int_least16_t', 'int_least32_t', 'int_least64_t', 'uint_least8_t', 'uint_least16_t', 'uint_least32_t', 'uint_least64_t', 'int_fast8_t', 'int_fast16_t', 'int_fast32_t', 'int_fast64_t', 'uint_fast8_t', 'uint_fast16_t', 'uint_fast32_t', 'uint_fast64_t', 'intptr_t', 'uintptr_t', 'intmax_t', 'uintmax_t')) linux_types = set( ('clockid_t', 'cpu_set_t', 'cpumask_t', 'dev_t', 'gid_t', 'id_t', 'ino_t', 'key_t', 'mode_t', 'nfds_t', 'pid_t', 'rlim_t', 'sig_t', 'sighandler_t', 'siginfo_t', 'sigset_t', 'sigval_t', 'socklen_t', 'timer_t', 'uid_t')) def __init__(self, **options): self.stdlibhighlighting = get_bool_opt(options, 'stdlibhighlighting', True) self.c99highlighting = get_bool_opt(options, 'c99highlighting', True) self.platformhighlighting = get_bool_opt(options, 'platformhighlighting', True) RegexLexer.__init__(self, **options) def get_tokens_unprocessed(self, text): for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text): if token is Name: if self.stdlibhighlighting and value in self.stdlib_types: token = Keyword.Type elif self.c99highlighting and value in self.c99_types: token = Keyword.Type elif self.platformhighlighting and value in self.linux_types: token = Keyword.Type yield index, token, value
class BatchLexer(RegexLexer): """ Lexer for the DOS/Windows Batch file format. .. versionadded:: 0.7 """ name = 'Batchfile' aliases = ['bat', 'batch', 'dosbatch', 'winbatch'] filenames = ['*.bat', '*.cmd'] mimetypes = ['application/x-dos-batch'] flags = re.MULTILINE | re.IGNORECASE _nl = r'\n\x1a' _punct = r'&<>|' _ws = r'\t\v\f\r ,;=\xa0' _space = r'(?:(?:(?:\^[%s])?[%s])+)' % (_nl, _ws) _keyword_terminator = (r'(?=(?:\^[%s]?)?[%s+./:[\\\]]|[%s%s(])' % (_nl, _ws, _nl, _punct)) _token_terminator = r'(?=\^?[%s]|[%s%s])' % (_ws, _punct, _nl) _start_label = r'((?:(?<=^[^:])|^[^:]?)[%s]*)(:)' % _ws _label = r'(?:(?:[^%s%s%s+:^]|\^[%s]?[\w\W])*)' % (_nl, _punct, _ws, _nl) _label_compound = (r'(?:(?:[^%s%s%s+:^)]|\^[%s]?[^)])*)' % (_nl, _punct, _ws, _nl)) _number = r'(?:-?(?:0[0-7]+|0x[\da-f]+|\d+)%s)' % _token_terminator _opword = r'(?:equ|geq|gtr|leq|lss|neq)' _string = r'(?:"[^%s"]*(?:"|(?=[%s])))' % (_nl, _nl) _variable = (r'(?:(?:%%(?:\*|(?:~[a-z]*(?:\$[^:]+:)?)?\d|' r'[^%%:%s]+(?::(?:~(?:-?\d+)?(?:,(?:-?\d+)?)?|(?:[^%%%s^]|' r'\^[^%%%s])[^=%s]*=(?:[^%%%s^]|\^[^%%%s])*)?)?%%))|' r'(?:\^?![^!:%s]+(?::(?:~(?:-?\d+)?(?:,(?:-?\d+)?)?|(?:' r'[^!%s^]|\^[^!%s])[^=%s]*=(?:[^!%s^]|\^[^!%s])*)?)?\^?!))' % (_nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl, _nl)) _core_token = r'(?:(?:(?:\^[%s]?)?[^"%s%s%s])+)' % (_nl, _nl, _punct, _ws) _core_token_compound = r'(?:(?:(?:\^[%s]?)?[^"%s%s%s)])+)' % (_nl, _nl, _punct, _ws) _token = r'(?:[%s]+|%s)' % (_punct, _core_token) _token_compound = r'(?:[%s]+|%s)' % (_punct, _core_token_compound) _stoken = (r'(?:[%s]+|(?:%s|%s|%s)+)' % (_punct, _string, _variable, _core_token)) def _make_begin_state(compound, _core_token=_core_token, _core_token_compound=_core_token_compound, _keyword_terminator=_keyword_terminator, _nl=_nl, _punct=_punct, _string=_string, _space=_space, _start_label=_start_label, _stoken=_stoken, _token_terminator=_token_terminator, _variable=_variable, _ws=_ws): rest = '(?:%s|%s|[^"%%%s%s%s])*' % (_string, _variable, _nl, _punct, ')' if compound else '') rest_of_line = r'(?:(?:[^%s^]|\^[%s]?[\w\W])*)' % (_nl, _nl) rest_of_line_compound = r'(?:(?:[^%s^)]|\^[%s]?[^)])*)' % (_nl, _nl) set_space = r'((?:(?:\^[%s]?)?[^\S\n])*)' % _nl suffix = '' if compound: _keyword_terminator = r'(?:(?=\))|%s)' % _keyword_terminator _token_terminator = r'(?:(?=\))|%s)' % _token_terminator suffix = '/compound' return [ ((r'\)', Punctuation, '#pop') if compound else (r'\)((?=\()|%s)%s' % (_token_terminator, rest_of_line), Comment.Single)), (r'(?=%s)' % _start_label, Text, 'follow%s' % suffix), (_space, using(this, state='text')), include('redirect%s' % suffix), (r'[%s]+' % _nl, Text), (r'\(', Punctuation, 'root/compound'), (r'@+', Punctuation), (r'((?:for|if|rem)(?:(?=(?:\^[%s]?)?/)|(?:(?!\^)|' r'(?<=m))(?:(?=\()|%s)))(%s?%s?(?:\^[%s]?)?/(?:\^[%s]?)?\?)' % (_nl, _token_terminator, _space, _core_token_compound if compound else _core_token, _nl, _nl), bygroups(Keyword, using(this, state='text')), 'follow%s' % suffix), (r'(goto%s)(%s(?:\^[%s]?)?/(?:\^[%s]?)?\?%s)' % (_keyword_terminator, rest, _nl, _nl, rest), bygroups(Keyword, using(this, state='text')), 'follow%s' % suffix), (words(('assoc', 'break', 'cd', 'chdir', 'cls', 'color', 'copy', 'date', 'del', 'dir', 'dpath', 'echo', 'endlocal', 'erase', 'exit', 'ftype', 'keys', 'md', 'mkdir', 'mklink', 'move', 'path', 'pause', 'popd', 'prompt', 'pushd', 'rd', 'ren', 'rename', 'rmdir', 'setlocal', 'shift', 'start', 'time', 'title', 'type', 'ver', 'verify', 'vol'), suffix=_keyword_terminator), Keyword, 'follow%s' % suffix), (r'(call)(%s?)(:)' % _space, bygroups(Keyword, using(this, state='text'), Punctuation), 'call%s' % suffix), (r'call%s' % _keyword_terminator, Keyword), (r'(for%s(?!\^))(%s)(/f%s)' % (_token_terminator, _space, _token_terminator), bygroups(Keyword, using(this, state='text'), Keyword), ('for/f', 'for')), (r'(for%s(?!\^))(%s)(/l%s)' % (_token_terminator, _space, _token_terminator), bygroups(Keyword, using(this, state='text'), Keyword), ('for/l', 'for')), (r'for%s(?!\^)' % _token_terminator, Keyword, ('for2', 'for')), (r'(goto%s)(%s?)(:?)' % (_keyword_terminator, _space), bygroups(Keyword, using(this, state='text'), Punctuation), 'label%s' % suffix), (r'(if(?:(?=\()|%s)(?!\^))(%s?)((?:/i%s)?)(%s?)((?:not%s)?)(%s?)' % (_token_terminator, _space, _token_terminator, _space, _token_terminator, _space), bygroups(Keyword, using(this, state='text'), Keyword, using(this, state='text'), Keyword, using(this, state='text')), ('(?', 'if')), (r'rem(((?=\()|%s)%s?%s?.*|%s%s)' % (_token_terminator, _space, _stoken, _keyword_terminator, rest_of_line_compound if compound else rest_of_line), Comment.Single, 'follow%s' % suffix), (r'(set%s)%s(/a)' % (_keyword_terminator, set_space), bygroups(Keyword, using(this, state='text'), Keyword), 'arithmetic%s' % suffix), (r'(set%s)%s((?:/p)?)%s((?:(?:(?:\^[%s]?)?[^"%s%s^=%s]|' r'\^[%s]?[^"=])+)?)((?:(?:\^[%s]?)?=)?)' % (_keyword_terminator, set_space, set_space, _nl, _nl, _punct, ')' if compound else '', _nl, _nl), bygroups(Keyword, using(this, state='text'), Keyword, using(this, state='text'), using(this, state='variable'), Punctuation), 'follow%s' % suffix), default('follow%s' % suffix) ] def _make_follow_state(compound, _label=_label, _label_compound=_label_compound, _nl=_nl, _space=_space, _start_label=_start_label, _token=_token, _token_compound=_token_compound, _ws=_ws): suffix = '/compound' if compound else '' state = [] if compound: state.append((r'(?=\))', Text, '#pop')) state += [ (r'%s([%s]*)(%s)(.*)' % (_start_label, _ws, _label_compound if compound else _label), bygroups(Text, Punctuation, Text, Name.Label, Comment.Single)), include('redirect%s' % suffix), (r'(?=[%s])' % _nl, Text, '#pop'), (r'\|\|?|&&?', Punctuation, '#pop'), include('text') ] return state def _make_arithmetic_state(compound, _nl=_nl, _punct=_punct, _string=_string, _variable=_variable, _ws=_ws): op = r'=+\-*/!~' state = [] if compound: state.append((r'(?=\))', Text, '#pop')) state += [ (r'0[0-7]+', Number.Oct), (r'0x[\da-f]+', Number.Hex), (r'\d+', Number.Integer), (r'[(),]+', Punctuation), (r'([%s]|%%|\^\^)+' % op, Operator), (r'(%s|%s|(\^[%s]?)?[^()%s%%^"%s%s%s]|\^[%s%s]?%s)+' % (_string, _variable, _nl, op, _nl, _punct, _ws, _nl, _ws, r'[^)]' if compound else r'[\w\W]'), using(this, state='variable')), (r'(?=[\x00|&])', Text, '#pop'), include('follow') ] return state def _make_call_state(compound, _label=_label, _label_compound=_label_compound): state = [] if compound: state.append((r'(?=\))', Text, '#pop')) state.append((r'(:?)(%s)' % (_label_compound if compound else _label), bygroups(Punctuation, Name.Label), '#pop')) return state def _make_label_state(compound, _label=_label, _label_compound=_label_compound, _nl=_nl, _punct=_punct, _string=_string, _variable=_variable): state = [] if compound: state.append((r'(?=\))', Text, '#pop')) state.append((r'(%s?)((?:%s|%s|\^[%s]?%s|[^"%%^%s%s%s])*)' % (_label_compound if compound else _label, _string, _variable, _nl, r'[^)]' if compound else r'[\w\W]', _nl, _punct, r')' if compound else ''), bygroups(Name.Label, Comment.Single), '#pop')) return state def _make_redirect_state(compound, _core_token_compound=_core_token_compound, _nl=_nl, _punct=_punct, _stoken=_stoken, _string=_string, _space=_space, _variable=_variable, _ws=_ws): stoken_compound = (r'(?:[%s]+|(?:%s|%s|%s)+)' % (_punct, _string, _variable, _core_token_compound)) return [ (r'((?:(?<=[%s%s])\d)?)(>>?&|<&)([%s%s]*)(\d)' % (_nl, _ws, _nl, _ws), bygroups(Number.Integer, Punctuation, Text, Number.Integer)), (r'((?:(?<=[%s%s])(?<!\^[%s])\d)?)(>>?|<)(%s?%s)' % (_nl, _ws, _nl, _space, stoken_compound if compound else _stoken), bygroups(Number.Integer, Punctuation, using(this, state='text'))) ] tokens = { 'root': _make_begin_state(False), 'follow': _make_follow_state(False), 'arithmetic': _make_arithmetic_state(False), 'call': _make_call_state(False), 'label': _make_label_state(False), 'redirect': _make_redirect_state(False), 'root/compound': _make_begin_state(True), 'follow/compound': _make_follow_state(True), 'arithmetic/compound': _make_arithmetic_state(True), 'call/compound': _make_call_state(True), 'label/compound': _make_label_state(True), 'redirect/compound': _make_redirect_state(True), 'variable-or-escape': [ (_variable, Name.Variable), (r'%%%%|\^[%s]?(\^!|[\w\W])' % _nl, String.Escape) ], 'string': [ (r'"', String.Double, '#pop'), (_variable, Name.Variable), (r'\^!|%%', String.Escape), (r'[^"%%^%s]+|[%%^]' % _nl, String.Double), default('#pop') ], 'sqstring': [ include('variable-or-escape'), (r'[^%]+|%', String.Single) ], 'bqstring': [ include('variable-or-escape'), (r'[^%]+|%', String.Backtick) ], 'text': [ (r'"', String.Double, 'string'), include('variable-or-escape'), (r'[^"%%^%s%s%s\d)]+|.' % (_nl, _punct, _ws), Text) ], 'variable': [ (r'"', String.Double, 'string'), include('variable-or-escape'), (r'[^"%%^%s]+|.' % _nl, Name.Variable) ], 'for': [ (r'(%s)(in)(%s)(\()' % (_space, _space), bygroups(using(this, state='text'), Keyword, using(this, state='text'), Punctuation), '#pop'), include('follow') ], 'for2': [ (r'\)', Punctuation), (r'(%s)(do%s)' % (_space, _token_terminator), bygroups(using(this, state='text'), Keyword), '#pop'), (r'[%s]+' % _nl, Text), include('follow') ], 'for/f': [ (r'(")((?:%s|[^"])*?")([%s%s]*)(\))' % (_variable, _nl, _ws), bygroups(String.Double, using(this, state='string'), Text, Punctuation)), (r'"', String.Double, ('#pop', 'for2', 'string')), (r"('(?:%%%%|%s|[\w\W])*?')([%s%s]*)(\))" % (_variable, _nl, _ws), bygroups(using(this, state='sqstring'), Text, Punctuation)), (r'(`(?:%%%%|%s|[\w\W])*?`)([%s%s]*)(\))' % (_variable, _nl, _ws), bygroups(using(this, state='bqstring'), Text, Punctuation)), include('for2') ], 'for/l': [ (r'-?\d+', Number.Integer), include('for2') ], 'if': [ (r'((?:cmdextversion|errorlevel)%s)(%s)(\d+)' % (_token_terminator, _space), bygroups(Keyword, using(this, state='text'), Number.Integer), '#pop'), (r'(defined%s)(%s)(%s)' % (_token_terminator, _space, _stoken), bygroups(Keyword, using(this, state='text'), using(this, state='variable')), '#pop'), (r'(exist%s)(%s%s)' % (_token_terminator, _space, _stoken), bygroups(Keyword, using(this, state='text')), '#pop'), (r'(%s%s)(%s)(%s%s)' % (_number, _space, _opword, _space, _number), bygroups(using(this, state='arithmetic'), Operator.Word, using(this, state='arithmetic')), '#pop'), (_stoken, using(this, state='text'), ('#pop', 'if2')), ], 'if2': [ (r'(%s?)(==)(%s?%s)' % (_space, _space, _stoken), bygroups(using(this, state='text'), Operator, using(this, state='text')), '#pop'), (r'(%s)(%s)(%s%s)' % (_space, _opword, _space, _stoken), bygroups(using(this, state='text'), Operator.Word, using(this, state='text')), '#pop') ], '(?': [ (_space, using(this, state='text')), (r'\(', Punctuation, ('#pop', 'else?', 'root/compound')), default('#pop') ], 'else?': [ (_space, using(this, state='text')), (r'else%s' % _token_terminator, Keyword, '#pop'), default('#pop') ] }
class HtmlLexer(RegexLexer): """ For HTML 4 and XHTML 1 markup. Nested JavaScript and CSS is highlighted by the appropriate lexer. """ name = 'HTML' aliases = ['html'] filenames = ['*.html', '*.htm', '*.xhtml', '*.xslt'] mimetypes = ['text/html', 'application/xhtml+xml'] flags = re.IGNORECASE | re.DOTALL tokens = { 'root': [ ('[^<&]+', Text), (r'&\S*?;', Name.Entity), (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc), ('<!--', Comment, 'comment'), (r'<\?.*?\?>', Comment.Preproc), ('<![^>]*>', Comment.Preproc), (r'(<)(\s*)(script)(\s*)', bygroups(Punctuation, Text, Name.Tag, Text), ('script-content', 'tag')), (r'(<)(\s*)(style)(\s*)', bygroups(Punctuation, Text, Name.Tag, Text), ('style-content', 'tag')), # note: this allows tag names not used in HTML like <x:with-dash>, # this is to support yet-unknown template engines and the like (r'(<)(\s*)([\w:.-]+)', bygroups(Punctuation, Text, Name.Tag), 'tag'), (r'(<)(\s*)(/)(\s*)([\w:.-]+)(\s*)(>)', bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text, Punctuation)), ], 'comment': [ ('[^-]+', Comment), ('-->', Comment, '#pop'), ('-', Comment), ], 'tag': [ (r'\s+', Text), (r'([\w:-]+\s*)(=)(\s*)', bygroups(Name.Attribute, Operator, Text), 'attr'), (r'[\w:-]+', Name.Attribute), (r'(/?)(\s*)(>)', bygroups(Punctuation, Text, Punctuation), '#pop'), ], 'script-content': [ (r'(<)(\s*)(/)(\s*)(script)(\s*)(>)', bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text, Punctuation), '#pop'), (r'.+?(?=<\s*/\s*script\s*>)', using(JavascriptLexer)), # fallback cases for when there is no closing script tag # first look for newline and then go back into root state # if that fails just read the rest of the file # this is similar to the error handling logic in lexer.py (r'.+?\n', using(JavascriptLexer), '#pop'), (r'.+', using(JavascriptLexer), '#pop'), ], 'style-content': [ (r'(<)(\s*)(/)(\s*)(style)(\s*)(>)', bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text, Punctuation), '#pop'), (r'.+?(?=<\s*/\s*style\s*>)', using(CssLexer)), # fallback cases for when there is no closing style tag # first look for newline and then go back into root state # if that fails just read the rest of the file # this is similar to the error handling logic in lexer.py (r'.+?\n', using(CssLexer), '#pop'), (r'.+', using(CssLexer), '#pop'), ], 'attr': [ ('".*?"', String, '#pop'), ("'.*?'", String, '#pop'), (r'[^\s>]+', String, '#pop'), ], } def analyse_text(text): if html_doctype_matches(text): return 0.5
def _make_begin_state(compound, _core_token=_core_token, _core_token_compound=_core_token_compound, _keyword_terminator=_keyword_terminator, _nl=_nl, _punct=_punct, _string=_string, _space=_space, _start_label=_start_label, _stoken=_stoken, _token_terminator=_token_terminator, _variable=_variable, _ws=_ws): rest = '(?:%s|%s|[^"%%%s%s%s])*' % (_string, _variable, _nl, _punct, ')' if compound else '') rest_of_line = r'(?:(?:[^%s^]|\^[%s]?[\w\W])*)' % (_nl, _nl) rest_of_line_compound = r'(?:(?:[^%s^)]|\^[%s]?[^)])*)' % (_nl, _nl) set_space = r'((?:(?:\^[%s]?)?[^\S\n])*)' % _nl suffix = '' if compound: _keyword_terminator = r'(?:(?=\))|%s)' % _keyword_terminator _token_terminator = r'(?:(?=\))|%s)' % _token_terminator suffix = '/compound' return [ ((r'\)', Punctuation, '#pop') if compound else (r'\)((?=\()|%s)%s' % (_token_terminator, rest_of_line), Comment.Single)), (r'(?=%s)' % _start_label, Text, 'follow%s' % suffix), (_space, using(this, state='text')), include('redirect%s' % suffix), (r'[%s]+' % _nl, Text), (r'\(', Punctuation, 'root/compound'), (r'@+', Punctuation), (r'((?:for|if|rem)(?:(?=(?:\^[%s]?)?/)|(?:(?!\^)|' r'(?<=m))(?:(?=\()|%s)))(%s?%s?(?:\^[%s]?)?/(?:\^[%s]?)?\?)' % (_nl, _token_terminator, _space, _core_token_compound if compound else _core_token, _nl, _nl), bygroups(Keyword, using(this, state='text')), 'follow%s' % suffix), (r'(goto%s)(%s(?:\^[%s]?)?/(?:\^[%s]?)?\?%s)' % (_keyword_terminator, rest, _nl, _nl, rest), bygroups(Keyword, using(this, state='text')), 'follow%s' % suffix), (words(('assoc', 'break', 'cd', 'chdir', 'cls', 'color', 'copy', 'date', 'del', 'dir', 'dpath', 'echo', 'endlocal', 'erase', 'exit', 'ftype', 'keys', 'md', 'mkdir', 'mklink', 'move', 'path', 'pause', 'popd', 'prompt', 'pushd', 'rd', 'ren', 'rename', 'rmdir', 'setlocal', 'shift', 'start', 'time', 'title', 'type', 'ver', 'verify', 'vol'), suffix=_keyword_terminator), Keyword, 'follow%s' % suffix), (r'(call)(%s?)(:)' % _space, bygroups(Keyword, using(this, state='text'), Punctuation), 'call%s' % suffix), (r'call%s' % _keyword_terminator, Keyword), (r'(for%s(?!\^))(%s)(/f%s)' % (_token_terminator, _space, _token_terminator), bygroups(Keyword, using(this, state='text'), Keyword), ('for/f', 'for')), (r'(for%s(?!\^))(%s)(/l%s)' % (_token_terminator, _space, _token_terminator), bygroups(Keyword, using(this, state='text'), Keyword), ('for/l', 'for')), (r'for%s(?!\^)' % _token_terminator, Keyword, ('for2', 'for')), (r'(goto%s)(%s?)(:?)' % (_keyword_terminator, _space), bygroups(Keyword, using(this, state='text'), Punctuation), 'label%s' % suffix), (r'(if(?:(?=\()|%s)(?!\^))(%s?)((?:/i%s)?)(%s?)((?:not%s)?)(%s?)' % (_token_terminator, _space, _token_terminator, _space, _token_terminator, _space), bygroups(Keyword, using(this, state='text'), Keyword, using(this, state='text'), Keyword, using(this, state='text')), ('(?', 'if')), (r'rem(((?=\()|%s)%s?%s?.*|%s%s)' % (_token_terminator, _space, _stoken, _keyword_terminator, rest_of_line_compound if compound else rest_of_line), Comment.Single, 'follow%s' % suffix), (r'(set%s)%s(/a)' % (_keyword_terminator, set_space), bygroups(Keyword, using(this, state='text'), Keyword), 'arithmetic%s' % suffix), (r'(set%s)%s((?:/p)?)%s((?:(?:(?:\^[%s]?)?[^"%s%s^=%s]|' r'\^[%s]?[^"=])+)?)((?:(?:\^[%s]?)?=)?)' % (_keyword_terminator, set_space, set_space, _nl, _nl, _punct, ')' if compound else '', _nl, _nl), bygroups(Keyword, using(this, state='text'), Keyword, using(this, state='text'), using(this, state='variable'), Punctuation), 'follow%s' % suffix), default('follow%s' % suffix) ]