def _highlight_syntax(self): """If we have pygments, syntax highlight the file.""" if self.syntax_format: stringified_text = \ ''.join(str(s.decode("utf-8")) for s in self.read_lines()) if self.syntax_format is not True: lexer = get_lexer_by_name(self.syntax_format) elif self.file_name: try: lexer = \ guess_lexer_for_filename( self.file_name, stringified_text) except TypeError: # XXX pygments py3 incompatibility workaround; fixed in tip lexer = guess_lexer(stringified_text) else: lexer = guess_lexer(stringified_text) highlighted_text = \ highlight( stringified_text, lexer, TerminalFormatter(bg="dark")).split('\n') line_num = 1 try: for line in highlighted_text: self._lines[ self._find_line_index(line_num)].update_highlight(line) line_num += 1 except KeyError: # our conversions to strings sometimes adds a trailing line pass
def parse(self): """ pygment highlighting """ inner = '' for node in self.nodes: inner += node.raw_content if highlight is None: return '<pre>%s</pre>' % inner if self.arguments['lang']: try: lexer = get_lexer_by_name(self.arguments['lang']) except ClassNotFound: try: lexer = guess_lexer(inner) except ClassNotFound: lexer = TextLexer() else: try: lexer = guess_lexer(inner) except ClassNotFound: lexer = TextLexer() hl_line = self.arguments['hl_line'] if not hl_line.isdigit(): return self.soft_raise("Code argument hl_line must be digit") hl_line = int(hl_line) hl_lines = [hl_line] if hl_line else [] formatter = HtmlFormatter(cssclass='code', noclasses=True, linenos='inline' if self.arguments['linenos'] == '1' else False, hl_lines=hl_lines) hilighted = highlight(inner, lexer, formatter) return hilighted
def highlight_code(self, string, pattern, lang_pattern): """Highlights syntaxes in the given string enclosed in a <syntax> tag. string -- String containing the code to highlight. pattern -- Compiled regex object for highlight pattern matching. lang_pattern -- Compiled regex for obtaining language name (if provided) """ codeblocks = re.findall(pattern, string) # results: list of tuples of 2 or 3 items # item[0] is the whole code block (syntax tag + code to highlight) # item[1] is the programming language (optional, depends on usage) # item[2] is the code to highlight if codeblocks: for match, lang, code in codeblocks: if lang: lang = re.sub(lang_pattern, '', lang) try: lexer = get_lexer_by_name(lang.lower(), **self.config.PYGMENTS_LEXER) # if the lang is not supported or has a typo # let pygments guess the language except ClassNotFound: lexer = guess_lexer(code, **self.config.PYGMENTS_LEXER) else: lexer = guess_lexer(code, **self.config.PYGMENTS_LEXER) formatter = HtmlFormatter(**self.config.PYGMENTS_HTML) highlighted = highlight(code, lexer, formatter) # add 1 arg because replacement should only be done # once for each match string = string.replace(match, highlighted, 1) return string
def __init__(self, file_content, file_name=None, language=None, private=False): self.set_file_content(file_content) self.file_name = file_name self.language = language self.private = private self.pub_date = datetime.utcnow() # guess language, if needed if self.language is None: if self.file_name is None: lexer = guess_lexer(self.file_content) else: try: lexer = guess_lexer_for_filename(self.file_name, self.file_content) except: lexer = guess_lexer(self.file_content) # verify if lexer is ok for filename found = False for pattern in lexer.filenames: if fnmatch(self.file_name, pattern): found = True break else: found = True if not found: lexer = TextLexer self.language = lexer.aliases[0]
def colorize(paste_key): " Color the paste in a deferred task. " from pygments import highlight from pygments.lexers import get_lexer_by_name from pygments.formatters import HtmlFormatter from pygments.lexers import guess_lexer paste = db.get(paste_key) if paste.syntax == 'auto': lexer = guess_lexer(paste.text) else: lexer = get_lexer_by_name(paste.syntax, stripall=True) if lexer is None: lexer = guess_lexer(paste.text) if lexer is None: lexer = get_lexer_by_name('text') formatter = HtmlFormatter(linenos=True, cssclass="highlight") paste.syntax = lexer.name paste.highlighted = highlight(paste.text, lexer, formatter) paste.colorized=True paste.put()
def highlight_content(content, lexer_name = None): if not lexer_name: lexer = guess_lexer(content) else: try: lexer = get_lexer_by_name(lexer_name) except ClassNotFound: lexer = guess_lexer(content) formatter = HtmlFormatter() return highlight(content.decode('utf8'), lexer, formatter)
def get_lexer(self, code, language): if language: try: return get_lexer_by_name(language) except pygments.util.ClassNotFound: try: return get_lexer_by_name(language.lower()) except pygments.util.ClassNotFound: return guess_lexer(code) else: return guess_lexer(code)
def highlight(text, filename=None): """Guess language in *text* and return its html highlighted version""" if filename is not None: #Pygments does not know dpr filename = filename.replace('dpr', 'pas') try: lexer = guess_lexer_for_filename(filename, text) except pygments.util.ClassNotFound: lexer = guess_lexer(text) else: lexer = guess_lexer(text) formatter = HtmlFormatter(style='manni', linenos=True, classprefix="highlight-") return formatter.get_style_defs(), p_highlight(text, lexer, formatter)
def apply_pygments(str): lines = str.splitlines() str = '\n'.join(lines[1:-1]) first = lines[0].strip().lstrip('`') if first: try: lexer = get_lexer_by_name(first) except: lexer = guess_lexer(str) else: lexer = guess_lexer(str) formatter = HtmlFormatter(style='colorful') return highlight(str, lexer, formatter)
def get_lexer(filename, data, lexer='auto'): """ Return a particular lexer instance. This method wraps pygments' methods guess_lexer_for_filename() and guess_lexer(). First guess_lexer_for_filename() is invoked; if there is no result the presence of a shebang is checked. If the subsequent call to guess_lexer() does not bring any results the fallback value TextLexer() is returned. Args: filename The name of the file to be displayed data The content of the file (utf-8 encoded) lexer Specifying another value than 'auto' skips any guess_lexer() calls. The pygments method get_lexer_by_name() is used to find a particular lexer class. If nothing has been found, c.py fails. """ filename = filename if filename else '-' if lexer == 'auto': debug("Trying to guess lexer for filename: '{}'".format(filename)) try: lexer_cls = guess_lexer_for_filename(filename, data) except ClassNotFound: if data[0:2] == b'#!': debug("Shebang '{}' present".format(data.splitlines()[0])) lexer_cls = guess_lexer(data.decode()) elif filename == '-': try: debug("Have read from 'stdin'; guessing lexer for content") lexer_cls = guess_lexer(data) except ClassNotFound: debug('Guessing failed, using fallback lexer') lexer_cls = TextLexer() else: debug('No shebang present, using fallback lexer') lexer_cls = TextLexer() except TypeError: debug('Guessing failed, using fallback lexer') lexer_cls = TextLexer() else: try: debug("Trying to find lexer: '{}'".format(lexer)) lexer_cls = get_lexer_by_name(lexer) except ClassNotFound: print("[Error] No lexer found: '{}'".format(lexer), file=sys.stderr) exit(1) debug('Using lexer: {}'.format(lexer_cls)) return lexer_cls
def get_lexer (text, lang): """ Return the Pygments lexer for parsing this sourcecode. :Parameters: text The sourcecode to be lexed for highlighting. This is analysed if the language is 'guess'. lang An abbreviation for the programming langauge of the code. Can be any 'name' accepted by Pygments, including 'none' (plain text) or 'guess' (analyse the passed code for clues). :Returns: A Pygments lexer. """ # TODO: what if source has errors? ## Preconditions & preparation: from pygments.lexers import (get_lexer_by_name, TextLexer, guess_lexer) ## Main: if lang == 'guess': try: return guess_lexer (text) except Exception: return None elif lang == 'none': return TextLexer else: return get_lexer_by_name (lang)
def get_language(source, code, language=None): """Get the current language we're documenting, based on the extension.""" if language is not None: for l in languages.values(): if l["name"] == language: return l else: raise ValueError("Unknown forced language: " + language) m = re.match(r'.*(\..+)', os.path.basename(source)) if source else None if m and m.group(1) in languages: return languages[m.group(1)] else: try: lang = lexers.guess_lexer(code).name.lower() for l in languages.values(): if l["name"] == lang: return l else: raise ValueError() except ValueError: # If pygments can't find any lexers, it will raise its own # subclass of ValueError. We will catch it and raise ours # for consistency. raise ValueError("Can't figure out the language!")
def get_highlighted(self, filename, hl_lines=None): """Get the highlighted version of a file.""" hl_lines = sorted(hl_lines or []) st = os.stat(filename) key = '%s-%d-%s-%s' % (filename, int(st.st_mtime), CACHE_SERIAL, hl_lines) key = os.path.join(self.cache_dir, hashlib.sha1(key).hexdigest() + '.html.gz') try: with gzip.open(key) as keyfile: return keyfile.read() except IOError: with open(filename) as infile: file_data = infile.read() try: lexer = lexers.guess_lexer_for_filename(filename, file_data) except pygments.util.ClassNotFound: try: lexer = lexers.guess_lexer(file_data) except pygments.util.ClassNotFound: lexer = lexers.TextLexer() highlight = pygments.highlight( file_data, lexer, formatters.HtmlFormatter( hl_lines=hl_lines, linenos='table', lineanchors='line', anchorlinenos=True)) with gzip.open(key, 'w') as keyfile: keyfile.write(highlight.encode('utf-8')) return highlight
def _blob_detail(request, project, branch_name, git_object, path_list, branches): """Render a blob. Pretty prints using Pygments""" breadcrumbs = generate_breadcrumbs(path_list) file_name = path_list[-1]['name'] try: lexer = guess_lexer_for_filename(file_name, git_object.as_raw_string()) except: lexer = guess_lexer(git_object.as_raw_string()) formatter = HtmlFormatter(linenos=True) pretty_printed_file = highlight(git_object.as_raw_string(), lexer, formatter) return render_to_response('project/blob.html', { 'project': project, 'branch_name': branch_name, 'object': git_object, 'path': path_list, 'breadcrumbs': breadcrumbs, 'pretty_print': pretty_printed_file, 'branches': branches, }, context_instance=RequestContext(request))
def __guessLexer(self, text): """ Private method to guess a pygments lexer. @param text text to base guessing on (string) @return reference to the guessed lexer (pygments.lexer) """ lexer = None if self.__pygmentsName: lexerClass = find_lexer_class(self.__pygmentsName) if lexerClass is not None: lexer = lexerClass() elif text: # step 1: guess based on filename and text if self.editor is not None: fn = self.editor.getFileName() if fn: try: lexer = guess_lexer_for_filename(fn, text) except ClassNotFound: pass # step 2: guess on text only if lexer is None: try: lexer = guess_lexer(text) except ClassNotFound: pass return lexer
def _render(self): if self.children: if not _pygments_available: raise NotAvailableError("Pygments is not available") try: lex_name = self.haml.split(" ", 1)[1] except IndexError: lex_name = False self.before = self.render_newlines() indent_offset = len(self.children[0].spaces) text = ''.join(''.join([c.spaces[indent_offset:], c.haml, c.render_newlines()]) for c in self.children) try: if lex_name: lexer = get_lexer_by_name(lex_name) else: lexer = guess_lexer(text) except ClassNotFound: # if invalid lexer name is given or # if no lexer thinks it can handle the content # we set default lexer lexer = TextLexer() self.before += highlight(text, lexer, HtmlFormatter()) else: self.after = self.render_newlines()
def _detect_lang_name(self, subject, paste_content): lexer = None if '.' in subject: if subject[-1] == ')': if ' (' in subject: name = subject.split(' (')[0] elif '(' in subject: name = subject.split('(')[0] else: name = subject else: name = subject if name.split('.')[-1] in self.recognized_extenstions: try: lexer = guess_lexer_for_filename(name, paste_content, encoding='utf-8') except (ClassNotFound, ImportError): pass if lexer is None and len(paste_content) >= 20: try: lexer = guess_lexer(paste_content, encoding='utf-8') except (ClassNotFound, ImportError): pass if lexer is None: try: lexer = get_lexer_by_name('text', encoding='utf-8') except (ClassNotFound, ImportError) as e: self.log(self.logger.WARNING, '%s: %s' % (subject, e)) return '' return lexer.aliases[0]
def hilite(self): """ Pass code to the [Pygments](http://pygments.pocoo.org/) highliter with optional line numbers. The output should then be styled with css to your liking. No styles are applied by default - only styling hooks (i.e.: <span class="k">). returns : A string of html. """ self.src = self.src.strip('\n') self.colons = self.shebang = False if self.lang is None: self._getLang() use_linenos = True # preserve old Codehilite behavior for # backwards-compatibility if self.shebang: use_linenos = True elif self.colons: use_linenos = self.force_linenos # allow global linenos override if self.linenos is not None: use_linenos = self.linenos if pygments: try: lexer = get_lexer_by_name(self.lang) except ValueError: try: if self.guess_lang: lexer = guess_lexer(self.src) else: lexer = TextLexer() except ValueError: lexer = TextLexer() formatter = HtmlFormatter(linenos=use_linenos, cssclass=self.css_class, style=self.style, noclasses=self.noclasses) return highlight(self.src, lexer, formatter) else: # just escape and build markup usable by JS highlighting libs txt = self.src.replace('&', '&') txt = txt.replace('<', '<') txt = txt.replace('>', '>') txt = txt.replace('"', '"') classes = [] if self.lang: classes.append('language-%s' % self.lang) if self.linenos: classes.append('linenums') class_str = '' if classes: class_str = ' class="%s"' % ' '.join(classes) return '<pre class="%s"><code%s>%s</code></pre>\n'% (self.css_class, class_str, txt)
def index(): ''' Main page : please enter your paste ''' if request.method == 'POST': if not request.form['input']: flash('Please type a content to paste') return render_template('index.html') delta = datetime.timedelta(seconds=int(request.form['expiration'])) expiration = datetime.datetime.now() + delta if request.form['expiration'] == '0': expiration = datetime.datetime(1, 1, 1) identifier = hashlib.sha1(request.form['input'] + time.ctime()).hexdigest()[:8] paste = highlight( request.form['input'], guess_lexer(request.form['input']), HtmlFormatter(linenos='table') ) g.db.execute('INSERT INTO PASTE (id, title, expiration, content) VALUES (?, ?, ?, ?)', ( identifier, request.form['title'], expiration, paste ) ) g.db.commit() return render_template('index.html', identifier=identifier, url=request.url) return render_template('index.html')
def pygmentize(self, tmpl, fctx, field): # append a <link ...> to the syntax highlighting css old_header = ''.join(tmpl('header')) if SYNTAX_CSS not in old_header: new_header = old_header + SYNTAX_CSS tmpl.cache['header'] = new_header text = fctx.data() if util.binary(text): return style = self.config("web", "pygments_style", "colorful") # To get multi-line strings right, we can't format line-by-line try: lexer = guess_lexer_for_filename(fctx.path(), text, encoding=util._encoding) except (ClassNotFound, ValueError): try: lexer = guess_lexer(text, encoding=util._encoding) except (ClassNotFound, ValueError): lexer = TextLexer(encoding=util._encoding) formatter = HtmlFormatter(style=style, encoding=util._encoding) colorized = highlight(text, lexer, formatter) # strip wrapping div colorized = colorized[:colorized.find('\n</pre>')] colorized = colorized[colorized.find('<pre>')+5:] coloriter = iter(colorized.splitlines()) filters['colorize'] = lambda x: coloriter.next() oldl = tmpl.cache[field] newl = oldl.replace('line|escape', 'line|colorize') tmpl.cache[field] = newl
def get_file_lexer(filename, content): """ Try to get a lexer by file extension, guess by content if that fails. """ try: # Pygments sometimes returns a weird lexer for .txt files. if filename.lower().endswith('.txt'): lexer = lexers.get_lexer_by_name('text') debug('Lexer forced by extension: {:>20} -> {}'.format( lexer.name, filename, )) else: lexer = lexers.get_lexer_for_filename(filename) debug('Lexer chosen by file name: {:>20} -> {}'.format( lexer.name, filename, )) except ClassNotFound: try: # Guess by content. lexer = lexers.guess_lexer(content) debug('Lexer guessed by content: {:>20} -> {}'.format( lexer.name, filename, )) except ClassNotFound: # Fall back to default lexer. lexer = lexers.get_lexer_by_name(DEFAULT_LEXER) debug('Lexer set to default: {:>20} -> {}'.format( lexer.name, filename, )) return lexer
def get_line_types(repo, repo_uri, rev, path): """Returns an array, where each item means a line of code. Each item is labled 'code', 'comment' or 'empty'""" #profiler_start("Processing LineTypes for revision %s:%s", (self.rev, self.file_path)) uri = os.path.join(repo_uri, path) # concat repo_uri and file_path for full path file_content = _get_file_content(repo, uri, rev) # get file_content if file_content is None or file_content == '': printerr("[get_line_types] Error: No file content for " + str(rev) + ":" + str(path) + " found! Skipping.") line_types = None else: try: lexer = get_lexer_for_filename(path) except ClassNotFound: try: printdbg("[get_line_types] Guessing lexer for" + str(rev) + ":" + str(path) + ".") lexer = guess_lexer(file_content) except ClassNotFound: printdbg("[get_line_types] No guess or lexer found for " + str(rev) + ":" + str(path) + ". Using TextLexer instead.") lexer = TextLexer() if isinstance(lexer, NemerleLexer): # this lexer is broken and yield an unstoppable process # see https://bitbucket.org/birkenfeld/pygments-main/issue/706/nemerle-lexer-ends-in-an-infinite-loop lexer = TextLexer() # Not shure if this should be skipped, when the language uses off-side rules (e.g. python, # see http://en.wikipedia.org/wiki/Off-side_rule for list) stripped_code = _strip_lines(file_content) lexer_output = _iterate_lexer_output(lexer.get_tokens(stripped_code)) line_types_str = _comment_empty_or_code(lexer_output) line_types = line_types_str.split("\n") return line_types
def highlight_file(self, linenos=True, style='default'): """ Highlight the input file, and return HTML as a string. """ try: lexer = lexers.get_lexer_for_filename(self.input_file) except pygments.util.ClassNotFound: # Try guessing the lexer (file type) later. lexer = None try: formatter = formatters.HtmlFormatter( linenos=linenos, style=style, full=True) except pygments.util.ClassNotFound: logging.error("\nInvalid style name: {}\nExpecting one of:\n \ {}".format(style, "\n ".join(sorted(styles.STYLE_MAP)))) sys.exit(1) try: with open(self.input_file, "r") as f: content = f.read() try: lexer = lexer or lexers.guess_lexer(content) except pygments.util.ClassNotFound: # No lexer could be guessed. lexer = lexers.get_lexer_by_name("text") except EnvironmentError as exread: fmt = "\nUnable to read file: {}\n{}" logging.error(fmt.format(self.input_file, exread)) sys.exit(2) return pygments.highlight(content, lexer, formatter)
def pygmentize(): try: language = request.form['language'] code = request.form['code'] except KeyError: return 'What the hell is wrong with you? Give me the parameters "language" and "code" at once!', 400 try: nowrap = False if int(request.form['nowrap']) == 1: nowrap = True except KeyError: nowrap = False params = {'stripall': True} if language == 'php': params['startinline'] = True try: lexer = get_lexer_by_name(language, **params) except ClassNotFound: lexer = guess_lexer(code, **params) return highlight(code, lexer, HtmlFormatter(nowrap = nowrap))
def syntax(env, value, lexer=None, filename=None): """ Processes the contained block using `pygments` """ try: import pygments from pygments import lexers from pygments import formatters except ImportError: logger.error(u"pygments library is required to" " use syntax highlighting tags.") raise TemplateError("Cannot load pygments") pyg = (lexers.get_lexer_by_name(lexer) if lexer else lexers.guess_lexer(value)) settings = {} if hasattr(env.config, 'syntax'): settings = getattr(env.config.syntax, 'options', Expando({})).to_dict() formatter = formatters.HtmlFormatter(**settings) code = pygments.highlight(value, pyg, formatter) code = code.replace('\n\n', '\n \n').replace('\n', '<br />') caption = filename if filename else pyg.name if hasattr(env.config, 'syntax'): if not getattr(env.config.syntax, 'use_figure', True): return Markup(code) return Markup( '<div class="codebox"><figure class="code">%s<figcaption>%s</figcaption></figure></div>\n\n' % (code, caption))
def get_language_for_code(code, mimetype=None, default='text'): '''Get language for filename and mimetype''' try: lexer = guess_lexer(code) except ClassNotFound: return default return get_known_alias(lexer, default)
def entry_view(context, request): paste = context.paste or u'' try: if context.language: l = lexers.get_lexer_by_name(context.language) else: l = lexers.guess_lexer(context.paste) l.aliases[0] except util.ClassNotFound: # couldn't guess lexer l = lexers.TextLexer() formatted_paste = pygments.highlight(paste, l, formatter) pastes = get_pastes(context, request, 10) return dict( author = context.author_name, date = context.date.strftime('%x at %X'), style_defs = style_defs, lexer_name = l.name, paste = formatted_paste, pastes = pastes, version = app_version, message = None, application_url = request.application_url, )
def get_lexer_name_from_code(code): try: return guess_lexer(code, encoding=None).aliases[0] except ClassNotFound: pass return ''
def get_context(self, value, parent_context=None): context = super().get_context(value, parent_context=parent_context) src = value.get('code') or '' src = src.strip('\n') lang = value.get('language') or '' line_numbers = value['line_numbers'] if lang: lexer = get_lexer_by_name(lang) else: lexer = guess_lexer(src) formatter = get_formatter_by_name( 'html', linenos=line_numbers, cssclass='codehilite', style='default', noclasses=False, ) context.update({ 'filename': value.get('filename'), 'display_filename': value.get('display_filename'), 'language': value.get('lang'), 'code': mark_safe(highlight(src, lexer, formatter)) }) return context
def hilite(self): """ Pass code to the [Pygments](http://pygments.pocoo.org/) highliter with optional line numbers. The output should then be styled with css to your liking. No styles are applied by default - only styling hooks (i.e.: <span class="k">). returns : A string of html. """ self.src = self.src.strip('\n') if self.lang is None: self._getLang() try: lexer = get_lexer_by_name(self.lang) except ValueError: try: if self.guess_lang: lexer = guess_lexer(self.src) else: lexer = TextLexer() except ValueError: lexer = TextLexer() formatter = HtmlFormatter(linenos=self.linenos, cssclass='codehilite', style=[], noclasses=self.noclasses) return highlight(self.src, lexer, formatter)
def highlight_block(self, source, lang, linenos=False): if isinstance(source, str): source = source.decode() if not pygments: return self.unhighlighted(source) if lang in ('py', 'python'): if source.startswith('>>>'): # interactive session lexer = lexers['pycon'] else: # maybe Python -- try parsing it if self.try_parse(source): lexer = lexers['python'] else: return self.unhighlighted(source) elif lang in ('python3', 'py3') and source.startswith('>>>'): # for py3, recognize interactive sessions, but do not try parsing... lexer = lexers['pycon3'] elif lang == 'guess': try: lexer = guess_lexer(source) except Exception: return self.unhighlighted(source) else: if lang in lexers: lexer = lexers[lang] else: lexer = lexers[lang] = get_lexer_by_name(lang) lexer.add_filter('raiseonerror') try: if self.dest == 'html': return highlight(source, lexer, self.fmter[bool(linenos)]) else: hlsource = highlight(source, lexer, self.fmter[bool(linenos)]) return hlsource.translate(tex_hl_escape_map) except ErrorToken: # this is most probably not the selected language, # so let it pass unhighlighted return self.unhighlighted(source)
def highlightcallback(code): try: lexer = get_lexer_by_name(code.attrib['lang']) except Exception: lexer = guess_lexer(etree.tostring(code)) output = code.text_content( ) # same as `etree.tostring(code, method='text')` afaict output = highlight(output, lexer, HtmlFormatter()) # NOTE: emitting the styles like this doesn't feel right # if you have multiple entries with source code -> redundant style tags # plus, all this style info doesn't really belong in the html output = '<style>' + HtmlFormatter().get_style_defs( '.highlight') + '</style>' + output # newElement has html tags around the actual content! newElement = fromstring(output) # lxml insists of wrapping with <html>..</html> tags, so page source would look like: # <code><html><style... # the easiest fix is just changing the html to div, we get rid of the html tag mid-document # and having a wrapping div tag is harmless. newElement.tag = 'div' code.clear() code.append(newElement)
def codehilite(self, lang, src): """Syntax highlite the inline code block.""" process_text = self.style_plain_text or lang or self.guess_lang if not lang and self.style_plain_text and not self.guess_lang: lang = 'text' if pygments and self.use_pygments and process_text: try: lexer = get_lexer_by_name(lang) except ValueError: try: if self.guess_lang: lexer = guess_lexer(src) else: lexer = get_lexer_by_name('text') except ValueError: # pragma: no cover lexer = get_lexer_by_name('text') formatter = InlineCodeHtmlFormatter(style=self.style, cssclass=self.css_class, noclasses=self.noclasses) code = highlight(src, lexer, formatter) else: # Just escape and build markup usable by JS highlighting libs txt = src.replace('&', '&') txt = txt.replace('<', '<') txt = txt.replace('>', '>') txt = txt.replace('"', '"') classes = [self.css_class ] if self.css_class and process_text else [] if lang and process_text: classes.append('language-%s' % lang) class_str = '' if len(classes): class_str = ' class="%s"' % ' '.join(classes) code = '<code%s>%s</code>' % (class_str, txt) placeholder = self.markdown.htmlStash.store(code, safe=True) return placeholder
def _format_output(code, args): #args为传递给main函数的一个数组参数 if not args['color']: #如果无color return code lexer = None #输出为空(34行) #尝试使用StackOverflow标记找到一个lexer #或查询参数 for keyword in args['query'].split() + args['tags']: #遍历新数组 try: #可能异常代码(102行) lexer = get_lexer_by_name(keyword) #函数返回值,37行 break except ClassNotFound: #39行 pass #上面没有找到lexer,则使用guesser if not lexer: #找不到lexer try: #可能异常代码(102行) lexer = guess_lexer(code) #函数返回值,37行 except ClassNotFound: #39行 return code #返回值为code return highlight(code, lexer, TerminalFormatter(bg='dark')) #返回函数值36行,38行
def highlight(path, text, encoding, style=DEFAULT_PYGMENT_STYLE): """ Returns a list of highlighted (i.e. HTML formatted) strings. """ if len(text) > MAX_HIGHLIGHT_SIZE: return map(escape, breezy.osutils.split_lines(text)) formatter = HtmlFormatter(style=style, nowrap=True, classprefix='pyg-') try: lexer = guess_lexer_for_filename(path, text[:1024], encoding=encoding) except (ClassNotFound, ValueError): try: lexer = guess_lexer(text[:1024], encoding=encoding) except (ClassNotFound, ValueError): lexer = TextLexer(encoding=encoding) hl_lines = _highlight_func(text, lexer, formatter) hl_lines = breezy.osutils.split_lines(hl_lines) return hl_lines
def save(self): super(TextFile, self).save() lexer = None content = self.f.value.decode('utf-8') plain_ext = '.txt', '.log', if self.ext in plain_ext: lexer = TextLexer else: try: lexer = get_lexer_for_filename(self.filename) except ClassNotFound: try: lexer = get_lexer_for_mimetype(self.mimetype) except ClassNotFound: try: lexer = guess_lexer(content) except ClassNotFound: lexer = TextLexer html = highlight( content, lexer, HtmlFormatter(linenos=True, lineanchors='line', anchorlinenos=True)) txt = Document() txt.file_id = self.id txt.html = html txt.content = content txt.save() return self.base
def pygment(src, lang = None, num = True): ''' Pass code to the [Pygments](http://pygments.pocoo.org/) highliter with optional line numbers. The output should then be styled with css to your liking. No styles are applied by default - only styling hooks (i.e.: <span class="k">). Usage: >>> pygment(src [, lang [, num ]] ) @param src: Can be a string or any object with a .readline attribute. @param lang: The language of code. Pygments will try to guess language if None. @param num: (Boolen) Turns line numbering 'on' or 'off' (on by default). @returns : A string of html. ''' try: from pygments import highlight from pygments.lexers import get_lexer_by_name, guess_lexer, TextLexer from pygments.formatters import HtmlFormatter except ImportError: # just escape and pass through txt = escape(src) if num: txt = number(txt) else : txt = '<div class="codehilite"><pre>%s</pre></div>\n'% txt return txt else: try: lexer = get_lexer_by_name(lang) except ValueError: try: lexer = guess_lexer(src) except ValueError: lexer = TextLexer() formatter = HtmlFormatter(linenos=num, cssclass="codehilite") return highlight(src, lexer, formatter)
def pygmentize(field, fctx, style, tmpl): # append a <link ...> to the syntax highlighting css old_header = tmpl.load('header') if SYNTAX_CSS not in old_header: new_header = old_header + SYNTAX_CSS tmpl.cache['header'] = new_header text = fctx.data() if util.binary(text): return # Pygments is best used with Unicode strings: # <http://pygments.org/docs/unicode/> text = text.decode(encoding.encoding, 'replace') # To get multi-line strings right, we can't format line-by-line try: lexer = guess_lexer_for_filename(fctx.path(), text[:1024]) except (ClassNotFound, ValueError): try: lexer = guess_lexer(text[:1024]) except (ClassNotFound, ValueError): lexer = TextLexer() formatter = HtmlFormatter(style=style) colorized = highlight(text, lexer, formatter) # strip wrapping div colorized = colorized[:colorized.find('\n</pre>')] colorized = colorized[colorized.find('<pre>')+5:] coloriter = (s.encode(encoding.encoding, 'replace') for s in colorized.splitlines()) tmpl.filters['colorize'] = lambda x: coloriter.next() oldl = tmpl.cache[field] newl = oldl.replace('line|escape', 'line|colorize') tmpl.cache[field] = newl
def pygmentize(field, fctx, style, tmpl): # append a <link ...> to the syntax highlighting css old_header = ''.join(tmpl('header')) if SYNTAX_CSS not in old_header: new_header = old_header + SYNTAX_CSS tmpl.cache['header'] = new_header text = fctx.data() if util.binary(text): return # avoid UnicodeDecodeError in pygments text = encoding.tolocal(text) # To get multi-line strings right, we can't format line-by-line try: lexer = guess_lexer_for_filename(fctx.path(), text[:1024], encoding=encoding.encoding) except (ClassNotFound, ValueError): try: lexer = guess_lexer(text[:1024], encoding=encoding.encoding) except (ClassNotFound, ValueError): lexer = TextLexer(encoding=encoding.encoding) formatter = HtmlFormatter(style=style, encoding=encoding.encoding) colorized = highlight(text, lexer, formatter) # strip wrapping div colorized = colorized[:colorized.find('\n</pre>')] colorized = colorized[colorized.find('<pre>') + 5:] coloriter = iter(colorized.splitlines()) tmpl.filters['colorize'] = lambda x: coloriter.next() oldl = tmpl.cache[field] newl = oldl.replace('line|escape', 'line|colorize') tmpl.cache[field] = newl
def tokenize_file(source_file, language=None, literal_option=3): print(source_file) code = "" try: with codecs.open(source_file, "r", encoding='utf-8', errors='ignore') as f: code = f.read() except UnicodeDecodeError: return '', [] if language is None: try: lexer = get_lexer_for_filename(source_file) language = languageForLexer(lexer) except KeyError: # Not a valid extension lexer = guess_lexer(code) language = languageForLexer(lexer) else: lexer = get_lexer_by_name(language) return tokenize_code(code, lexer, language, literal_option)
def _format_output(code, args): if not args['color']: return code lexer = None # try to find a lexer using the StackOverflow tags # or the query arguments for keyword in args['query'].split() + args['tags']: try: lexer = get_lexer_by_name(keyword) break except ClassNotFound: pass # no lexer found above, use the guesser if not lexer: try: lexer = guess_lexer(code) except ClassNotFound: return code return highlight(code, lexer, TerminalFormatter(bg='dark'))
def highlight_or_render(code, filename, render_markup=True, ctags=None, ctags_baseurl=None): """Render code using Pygments, markup (markdown, rst, ...) using the corresponding renderer, if available. :param code: the program code to highlight, str :param filename: name of the source file the code is taken from, str :param render_markup: whether to render markup if possible, bool :param ctags: tagsfile obj used for source code hyperlinks, ``ctags.CTags`` :param ctags_baseurl: base url used for source code hyperlinks, str """ if render_markup and markup.can_render(filename): return markup.render(filename, code) try: lexer = get_lexer_for_filename(filename, code) except ClassNotFound: try: lexer = guess_lexer(code) except ClassNotFound: lexer = TextLexer() formatter_cls = { "Python": KlausPythonFormatter, }.get(lexer.name, KlausDefaultFormatter) if ctags: ctags_urlscheme = ctags_baseurl + "%(path)s%(fname)s%(fext)s" else: ctags_urlscheme = None formatter = formatter_cls( language=PYGMENTS_CTAGS_LANGUAGE_MAP.get(lexer.name), ctags=ctags, tagurlformat=ctags_urlscheme, ) return highlight(code, lexer, formatter)
def insert_it(self, text, lang, style): editor = self.window.get_viewer().get_editor() textview = editor._textview try: if lang == self.automatic: lexer = guess_lexer(text) else: lexer = get_lexer_by_name(lang, stripall=True) except ClassNotFound: textview.insert_html(text) else: formatter = HtmlFormatter(noclasses=True, lineseparator="<br>", style=style) result = highlight(text, lexer, formatter) #fix encoding result = result.replace(""", '"') result = result.replace("'", "'") #fix leading spaces result = result.replace("\x09", '	') result = result.replace(" ", " ") textview.insert_html(result)
def run(self): self.assert_has_content() content = self.wrapped_content() if len(self.arguments) > 0: try: lexer = get_lexer_by_name(self.arguments[0]) except (ValueError, IndexError): # lexer not found, use default. lexer = TextLexer() else: lexer = guess_lexer(content) # import sys # print >>sys.stderr, self.arguments, lexer.__class__ # take an arbitrary option if more than one is given formatter = self.options and VARIANTS[self.options.keys() [0]] or DEFAULT parsed = highlight(content, lexer, formatter) return [nodes.raw('', parsed, format='html')]
def view(id, extension=None): if id == 'about': doc = {'text': about_text} else: doc = paste.get_paste(id) if doc is None: abort(404) highlighted = request.args.get('h') if highlighted: highlighted = highlighted.split(' ') try: lexer = get_lexer_for_filename('foo.{}'.format(extension)) except ClassNotFound: lexer = guess_lexer(doc['text']) if id == 'about': title = '/about.md' days_left = None elif doc['delete_at'] is None: title = f'{request.path}{urlencode(request.args)}' days_left = None else: title = f'{request.path}{urlencode(request.args)}' days_left = (doc['delete_at'] - doc['created_at']).days return dict( id=id, text=highlight(doc['text'], lexer, HtmlFormatter()), text_raw=doc['text'], days_left=days_left, extension=extension, lines=doc['text'].count('\n') + 1, highlighted=highlighted, title=title, title_link=request.url )
def view_paste(paste): r = get_paste(paste) lang = request.args.get('l') if 'l' in request.args else r.language paste = r.code lexer = None if lang == None: try: lexer = guess_lexer(paste) except ClassNotFound: pass if lexer is None: try: lexer = get_lexer_by_name(lang) except: abort(500, 'Invalid lexer: %s' % lang) if lexer is None: try: lexer = get_lexer_by_name('text') except: abort(500, 'Invalid lexer: %s' % lang) formatter = HtmlFormatter( linenos=True) #, cssclass='syntax')#, style='friendly') h = pygments.highlight(paste, lexer, formatter) pasteid = r.private_id if r.private_id else r.paste_id return render_template('viewpaste.html', data=h, theme=get_theme(), langs=LANGS, pasteid=pasteid, currlang=lexer.aliases[0])
def get_lexer(self, source: str, lang: str, opts: Optional[Dict] = None, force: bool = False, location: Any = None) -> Lexer: if not opts: opts = {} # find out which lexer to use if lang in ('py', 'python'): if source.startswith('>>>'): # interactive session lang = 'pycon' else: lang = 'python' elif lang in ('py3', 'python3', 'default'): if source.startswith('>>>'): lang = 'pycon3' else: lang = 'python3' if lang in lexers: # just return custom lexers here (without installing raiseonerror filter) return lexers[lang] elif lang in lexer_classes: lexer = lexer_classes[lang](**opts) else: try: if lang == 'guess': lexer = guess_lexer(source, **opts) else: lexer = get_lexer_by_name(lang, **opts) except ClassNotFound: logger.warning(__('Pygments lexer name %r is not known'), lang, location=location) lexer = lexer_classes['none'](**opts) if not force: lexer.add_filter('raiseonerror') return lexer
def write_content(content_type, content, formatter, fh=sys.stdout): # nlines = len(content) content = dedent(''.join(content)) # ' ' to keep pygments from removing empty lines # split, merge by \n can introduce one additional line content = [' \n' if x == '' else x + '\n' for x in content.split('\n')][:nlines] # if content_type == 'COMMENT': fh.write(highlight(''.join(content), SoS_Lexer(), formatter)) elif content_type in ('REPORT', 'report'): fh.write(highlight(''.join(content), TextLexer(), formatter)) elif content_type == 'SECTION': fh.write(highlight(''.join(content), SoS_Lexer(), formatter)) elif content_type == 'DIRECTIVE': fh.write(highlight(''.join(content), SoS_Lexer(), formatter)) elif content_type == 'ASSIGNMENT': fh.write(highlight(''.join(content), SoS_Lexer(), formatter)) elif content_type == 'STATEMENT': fh.write(highlight(''.join(content), SoS_Lexer(), formatter)) elif content_type == 'ERROR': fh.write(highlight(''.join(content), SoS_Lexer(), formatter)) else: if content_type == 'run': content_type = 'bash' elif content_type == 'node': content_type = 'JavaScript' elif content_type == 'report': content_type == 'text' try: lexer = get_lexer_by_name(content_type) except: try: lexer = guess_lexer(''.join(content)) except: lexer = TextLexer() fh.write(highlight((''.join(content)), lexer, formatter))
def _highlight(self, lang, linenos, caller=None): # highlight code using Pygments body = caller() # Check the globals to see if a cssclass setting for Pygment's HtmlFormatter # has been set try: cssclass = self.environment.jinja2_highlight_cssclass except AttributeError: cssclass = None try: if lang is None: lexer = guess_lexer(body) else: lexer = get_lexer_by_name(lang, stripall=False) except ClassNotFound as e: print(e) sys.exit(1) # Set the cssclass if we have one # The linenos setting expects either 'inline' or 'table', as per Pygment's # settings, any true value except 'inline' results in 'table'. If linenos # hasn't been assigned in parse it will be None and will result in no # line numbers if cssclass is not None: formatter = HtmlFormatter(cssclass=cssclass, linenos=linenos) else: formatter = HtmlFormatter(linenos=linenos) # If you place the tag on the line under the code, like this; # pint_glass.drink() # {% endhighlight %} # The result will have an extra blank line underneath, this can cause an extra # blank line of line numbering. # Use rstrip to remove extraneous white space code = highlight(Markup(body.rstrip()).unescape(), lexer, formatter) return code
def pygmentize(value): last_end = 0 to_return = '' found = 0 for match_obj in regex.finditer(value): code_class = match_obj.group(1) code_string = match_obj.group(2) if code_class.find('class'): language = re.split(r'"|\'', code_class)[1] lexer = lexers.get_lexer_by_name(language) else: try: lexer = lexers.guess_lexer(str(code)) except ValueError: lexer = lexers.PythonLexer() pygmented_string = pygments.highlight(code_string, lexer, formatters.HtmlFormatter()) to_return = to_return + value[last_end:match_obj. start(0)] + pygmented_string last_end = match_obj.end(2) found = found + 1 to_return = to_return + value[last_end:] return to_return
def guess_language(raw: str, filename: Optional[str] = None) -> str: options = {'stripnl': True} # Guess a lexer based on filename and raw text first if filename: try: return guess_lexer_for_filename(filename, raw, **options).aliases[0] except (ValueError, IndexError): pass # If that didn't work guess lexer just by looking at the raw text try: language = guess_lexer(raw, **options).aliases[0] except (ValueError, IndexError): # If no lexer was detected, fallback to plain text. return 'text' # These are odd lexers that match far too often, so exclude them. if language in GUESS_LANG_IGNORES: return 'text' # Finally check for language overrides and return return GUESS_LANG_OVERRIDES.get(language, language)
def filter(self, source, **kwargs): encoded = super(CodeHighlighter, self).filter(source, **kwargs) try: from pygments import highlight from pygments import lexers from pygments import formatters except ImportError as ex: print('<%s> - Failed to import pygments! (%s)' % (self.__class__.__name__, ex)) print('-- You may need to install it from: http://pygments.org') return encoded lexer = None try: lexer = lexers.guess_lexer(source) except lexers.ClassNotFound: lexer = lexers.PythonLexer() formatter = formatters.HtmlFormatter(cssclass='code_highlighter') encoded = highlight(encoded, lexer, formatter) css = formatter.get_style_defs('.code_highlighter') return '''<style type="text/css"><!-- %(css)s --></style>%(source)s''' % {'css' : css, 'source' : encoded}
def colorize_blob(fname, s: str) -> str: try: lexer = lexers.guess_lexer_for_filename(fname, s, encoding="utf-8") except lexers.ClassNotFound: # Only try to guess lexers if the file starts with a shebang, # otherwise it's likely a text file and guess_lexer() is prone to # make mistakes with those. lexer = lexers.TextLexer(encoding="utf-8") if s.startswith("#!"): try: lexer = lexers.guess_lexer(s[:80], encoding="utf-8") except lexers.ClassNotFound: pass formatter = HtmlFormatter( encoding="utf-8", cssclass="source_code", linenos="table", anchorlinenos=True, lineanchors="line", ) return highlight(s, lexer, formatter)
def get_lexer(self, src, language): """Get the Pygments lexer.""" if language: language, lexer_options = self.get_extended_language(language) else: lexer_options = {} # Try and get lexer by the name given. try: lexer = get_lexer_by_name(language, **lexer_options) except Exception: lexer = None if lexer is None: if self.guess_lang: try: lexer = guess_lexer(src) except Exception: # pragma: no cover pass if lexer is None: lexer = get_lexer_by_name('text') return lexer
def syntax_hl(src, lang=None, guess_lang=False, inline=False): """Highlight.""" css_class = 'inline-highlight' if inline else 'highlight' src = src.strip('\n') try: lexer = get_lexer_by_name(lang) except ValueError: try: if guess_lang: lexer = guess_lexer(src) else: lexer = get_lexer_by_name('text') except ValueError: lexer = get_lexer_by_name('text') if inline: formatter = SublimeInlineHtmlFormatter(cssclass=css_class, classprefix=css_class + ' ') else: formatter = SublimeBlockFormatter(cssclass=css_class) return highlight(src, lexer, formatter)
def lang_for_block(source,lang): if lang in ('py', 'python'): if source.startswith('>>>'): # interactive session return 'pycon' else: # maybe Python -- try parsing it if try_parse(source): return 'python' else: # Guess return lang_for_block(source,'guess') elif lang in ('python3', 'py3') and source.startswith('>>>'): # for py3, recognize interactive sessions, but do not try parsing... return 'pycon3' elif lang == 'guess': try: #return 'python' lexer=guess_lexer(source) return lexer.aliases[0] except Exception: return None else: return lang
def guess_filetype(filepath: pathlib.Path) -> FileType: filetype = guess_filetype_from_path(filepath) if filetype is not None: return filetype try: # the shebang is read as utf-8 because the filetype config file # is utf-8 with filepath.open("r", encoding="utf-8") as file: # don't read the entire file if it's huge and all on one line shebang_line: Optional[str] = file.readline(1000) except (UnicodeError, OSError): shebang_line = None # don't guess from first line of file when it's not a shebang if shebang_line is not None and not shebang_line.startswith("#!"): shebang_line = None if shebang_line is not None: filetype = guess_filetype_from_shebang(shebang_line) if filetype is not None: return filetype # if nothing else works, create a new filetype automagically based on pygments try: lexer = lexers.get_lexer_for_filename(filepath) except ClassNotFound: if shebang_line is None: return filetypes["Plain Text"] # give up lexer = lexers.guess_lexer(shebang_line) if isinstance(lexer, lexers.TextLexer): return filetypes["Plain Text"] # give up return { "pygments_lexer": type(lexer).__module__ + "." + type(lexer).__name__, "langserver": None, }
async def highlight(context): """ Generates syntax highlighted images. """ if context.fwd_from: return reply = await context.get_reply_message() reply_id = None await context.edit("Rendering image, please wait . . .") if reply: reply_id = reply.id target_file_path = await context.client.download_media( await context.get_reply_message() ) if target_file_path is None: message = reply.text else: if Magic(mime=True).from_file(target_file_path) != 'text/plain': message = reply.text else: with open(target_file_path, 'r') as file: message = file.read() remove(target_file_path) else: if context.arguments: message = context.arguments else: await context.edit("`Unable to retrieve target message.`") return lexer = guess_lexer(message) formatter = img.JpgImageFormatter(style="colorful") result = syntax_highlight(message, lexer, formatter, outfile=None) await context.edit("Uploading image . . .") await context.client.send_file( context.chat_id, result, reply_to=reply_id ) await context.delete()
def pygmentize(value): ''' Finds all <code class="python"></code> blocks in a text block and replaces it with pygments-highlighted html semantics. It relies that you provide the format of the input as class attribute. Inspiration: http://www.djangosnippets.org/snippets/25/ Updated by: Samualy Clay Example ------- {% post.body|pygmentize %} ''' last_end = 0 to_return = '' found = 0 for match_obj in regex.finditer(value): code_class = match_obj.group(1) code_string = match_obj.group(2) if code_class.find('class'): language = re.split(r'"|\'', code_class)[1] lexer = lexers.get_lexer_by_name(language) else: try: lexer = lexers.guess_lexer(str(code)) except ValueError: lexer = lexers.PythonLexer() pygmented_string = pygments.highlight(code_string, lexer, formatters.HtmlFormatter()) to_return = to_return + value[last_end:match_obj. start(0)] + pygmented_string last_end = match_obj.end(2) found = found + 1 to_return = to_return + value[last_end:] return to_return
def expand(self, parameter, remark): document = remark.document scope = remark.scopeStack.top() codeType = scope.getString('Code.type', '') lexer = None if codeType != '': # The code-type was given explicitly. # See if we can find a corresponding pygments lexer. try: lexer = get_lexer_by_name(codeType) except: remark.reporter.reportWarning( 'The code-type ' + codeType + ' is not recognized by Pygments. ' + 'Guessing code-type from content.', 'invalid-input') # Prepare for Pygments input. inputText = '\n'.join(parameter) if lexer == None: # Try to guess the type of the code. try: lexer = guess_lexer(inputText) except: remark.reporter.reportWarning( 'The code-type cannot be guessed from the content by Pygments. ' + 'Setting code-type to text.', 'invalid-input') lexer = get_lexer_by_name('text') # Highlight the code. hilightedText = highlight(inputText, lexer, HtmlFormatter()) # Prepare for Remark output. hilightedText = hilightedText.split('\n') return htmlRegion(hilightedText)