def _format_lines(self, tokensource): buf = [] for ttype, value in apply_filters(tokensource, [StripDocTestFilter()]): if ttype in Token.Sql: for t, v in HtmlFormatter._format_lines(self, iter(buf)): yield t, v buf = [] if ttype is Token.Sql: yield 1, "<div class='show_sql'>%s</div>" % \ re.sub( r'(?:{stop}|\n+)$', '', filters.html_escape(value)) elif ttype is Token.Sql.Link: yield 1, "<a href='#' class='sql_link'>sql</a>" elif ttype is Token.Sql.Popup: yield 1, "<div class='popup_sql'>%s</div>" % \ re.sub( r'(?:{stop}|\n+)$', '', filters.html_escape(value)) else: buf.append((ttype, value)) for t, v in _strip_trailing_whitespace( HtmlFormatter._format_lines(self, iter(buf))): yield t, v
def get_tokens(self, text, unfiltered=False): """ Return an iterable of (tokentype, value) pairs generated from `text`. If `unfiltered` is set to `True`, the filtering mechanism is bypassed even if filters are defined. Also preprocess the text, i.e. expand tabs and strip it if wanted and applies registered filters. """ if not isinstance(text, str): if self.encoding == 'guess': text, _ = guess_decode(text) elif self.encoding == 'chardet': try: import chardet except ImportError as e: raise ImportError( 'To enable chardet encoding guessing, ' 'please install the chardet library ' 'from http://chardet.feedparser.org/') from e # check for BOM first decoded = None for bom, encoding in _encoding_map: if text.startswith(bom): decoded = text[len(bom):].decode(encoding, 'replace') break # no BOM found, so use chardet if decoded is None: enc = chardet.detect(text[:1024]) # Guess using first 1KB decoded = text.decode( enc.get('encoding') or 'utf-8', 'replace') text = decoded else: text = text.decode(self.encoding) if text.startswith('\ufeff'): text = text[len('\ufeff'):] else: if text.startswith('\ufeff'): text = text[len('\ufeff'):] # text now *is* a unicode string text = text.replace('\r\n', '\n') text = text.replace('\r', '\n') if self.stripall: text = text.strip() elif self.stripnl: text = text.strip('\n') if self.tabsize > 0: text = text.expandtabs(self.tabsize) if self.ensurenl and not text.endswith('\n'): text += '\n' def streamer(): for _, t, v in self.get_tokens_unprocessed(text): yield t, v stream = streamer() if not unfiltered: stream = apply_filters(stream, self.filters, self) return stream
def get_tokens(self, text, unfiltered=False): """ Return an iterable of (tokentype, value) pairs generated from `text`. If `unfiltered` is set to `True`, the filtering mechanism is bypassed even if filters are defined. Also preprocess the text, i.e. expand tabs and strip it if wanted and applies registered filters. """ if not isinstance(text, unicode): if self.encoding == "guess": try: text = text.decode("utf-8") if text.startswith(u"\ufeff"): text = text[len(u"\ufeff") :] except UnicodeDecodeError: text = text.decode("latin1") elif self.encoding == "chardet": try: import chardet except ImportError: raise ImportError( "To enable chardet encoding guessing, " "please install the chardet library " "from http://chardet.feedparser.org/" ) # check for BOM first decoded = None for bom, encoding in _encoding_map: if text.startswith(bom): decoded = unicode(text[len(bom) :], encoding, errors="replace") break # no BOM found, so use chardet if decoded is None: enc = chardet.detect(text[:1024]) # Guess using first 1KB decoded = unicode(text, enc.get("encoding") or "utf-8", errors="replace") text = decoded else: text = text.decode(self.encoding) # text now *is* a unicode string text = text.replace("\r\n", "\n") text = text.replace("\r", "\n") if self.stripall: text = text.strip() elif self.stripnl: text = text.strip("\n") if self.tabsize > 0: text = text.expandtabs(self.tabsize) if self.ensurenl and not text.endswith("\n"): text += "\n" def streamer(): for i, t, v in self.get_tokens_unprocessed(text): yield t, v stream = streamer() if not unfiltered: stream = apply_filters(stream, self.filters, self) return stream
def get_tokens(self, text, unfiltered=False): """ Return an iterable of (tokentype, value) pairs generated from `text`. If `unfiltered` is set to `True`, the filtering mechanism is bypassed even if filters are defined. Also preprocess the text, i.e. expand tabs and strip it if wanted and applies registered filters. """ if not isinstance(text, text_type): if self.encoding == 'guess': text, _ = guess_decode(text) elif self.encoding == 'chardet': try: import chardet except ImportError: raise ImportError('To enable chardet encoding guessing, ' 'please install the chardet library ' 'from http://chardet.feedparser.org/') # check for BOM first decoded = None for bom, encoding in _encoding_map: if text.startswith(bom): decoded = text[len(bom):].decode(encoding, 'replace') break # no BOM found, so use chardet if decoded is None: enc = chardet.detect(text[:1024]) # Guess using first 1KB decoded = text.decode(enc.get('encoding') or 'utf-8', 'replace') text = decoded else: text = text.decode(self.encoding) if text.startswith(u'\ufeff'): text = text[len(u'\ufeff'):] else: if text.startswith(u'\ufeff'): text = text[len(u'\ufeff'):] # text now *is* a unicode string text = text.replace('\r\n', '\n') text = text.replace('\r', '\n') if self.stripall: text = text.strip() elif self.stripnl: text = text.strip('\n') if self.tabsize > 0: text = text.expandtabs(self.tabsize) if self.ensurenl and not text.endswith('\n'): text += '\n' def streamer(): for _, t, v in self.get_tokens_unprocessed(text): yield t, v stream = streamer() if not unfiltered: stream = apply_filters(stream, self.filters, self) return stream
def _filter_tokens(self, tokensource): for ttype, value in apply_filters(tokensource, [StripDocTestFilter()]): if ttype in Token.Sql: if ttype is not Token.Sql.Link and ttype is not Token.Sql.Open: yield Token.Literal, re.sub(r"{stop}", "", value) else: continue else: yield ttype, value
def _filter_tokens(self, tokensource): for ttype, value in apply_filters(tokensource, [StripDocTestFilter()]): if ttype in Token.Sql: if ttype is not Token.Sql.Link and ttype is not Token.Sql.Open: yield Token.Literal, re.sub(r'{stop}', '', value) else: continue else: yield ttype, value
def _pygments_get_tokens_postprocess(self, text, unfiltered=False): def streamer(): for _, t, v in self.get_tokens_unprocessed(text): yield t, v stream = streamer() if not unfiltered: stream = apply_filters(stream, self.filters, self) return stream
def _format_lines(self, tokensource): detect_annotations = DetectAnnotationsFilter() for ttype, value in super()._format_lines( apply_filters(tokensource, [detect_annotations]) ): yield ttype, value self.annotated = detect_annotations.annotated
def get_tokens(self, text, unfiltered=False): """ Return an iterable of (tokentype, value) pairs generated from `text`. If `unfiltered` is set to `True`, the filtering mechanism is bypassed even if filters are defined. Also preprocess the text, i.e. expand tabs and strip it if wanted and applies registered filters. """ if not isinstance(text, unicode): if self.encoding == 'guess': try: text = text.decode('utf-8') if text.startswith(u'\ufeff'): text = text[len(u'\ufeff'):] except UnicodeDecodeError: text = text.decode('latin1') elif self.encoding == 'chardet': try: import chardet except ImportError: raise ImportError('To enable chardet encoding guessing, ' 'please install the chardet library ' 'from http://chardet.feedparser.org/') enc = chardet.detect(text) text = text.decode(enc['encoding']) else: text = text.decode(self.encoding) # text now *is* a unicode string text = text.replace('\r\n', '\n') text = text.replace('\r', '\n') if self.stripall: text = text.strip() elif self.stripnl: text = text.strip('\n') if self.tabsize > 0: text = text.expandtabs(self.tabsize) if self.ensurenl and not text.endswith('\n'): text += '\n' def streamer(): for i, t, v in self.get_tokens_unprocessed(text): yield t, v stream = streamer() if not unfiltered: stream = apply_filters(stream, self.filters, self) return stream
def get_tokens(self, text, unfiltered=False): """ Return an iterable of (tokentype, value) pairs generated from `text`. If `unfiltered` is set to `True`, the filtering mechanism is bypassed even if filters are defined. Also preprocess the text, i.e. expand tabs and strip it if wanted and applies registered filters. """ if not isinstance(text, unicode): if self.encoding == 'guess': try: text = text.decode('utf-8') if text.startswith(u'\ufeff'): text = text[len(u'\ufeff'):] except UnicodeDecodeError: text = text.decode('utf-8') elif self.encoding == 'chardet': try: import chardet except ImportError: raise ImportError('To enable chardet encoding guessing, ' 'please install the chardet library ' 'from http://chardet.feedparser.org/') enc = chardet.detect(text) text = text.decode(enc['encoding']) else: text = text.decode(self.encoding) # text now *is* a unicode string text = text.replace('\r\n', '\n') text = text.replace('\r', '\n') if self.stripall: text = text.strip() elif self.stripnl: text = text.strip('\n') if self.tabsize > 0: text = text.expandtabs(self.tabsize) if self.ensurenl and not text.endswith('\n'): text += '\n' def streamer(): for i, t, v in self.get_tokens_unprocessed(text): yield t, v stream = streamer() if not unfiltered: stream = apply_filters(stream, self.filters, self) return stream
def _format_lines(self, tokensource): buf = [] for ttype, value in apply_filters(tokensource, [StripDocTestFilter()]): if ttype in Token.Sql: for t, v in HtmlFormatter._format_lines(self, iter(buf)): yield t, v buf = [] if ttype is Token.Sql: yield 1, "<div class='show_sql'>%s</div>" % re.sub(r"(?:[{stop}|\n]*)$", "", value) elif ttype is Token.Sql.Link: yield 1, "<a href='#' class='sql_link'>sql</a>" elif ttype is Token.Sql.Popup: yield 1, "<div class='popup_sql'>%s</div>" % re.sub(r"(?:[{stop}|\n]*)$", "", value) else: buf.append((ttype, value)) for t, v in _strip_trailing_whitespace(HtmlFormatter._format_lines(self, iter(buf))): yield t, v
def _format_lines(self, tokensource): sql_lexer = SqlLexer() formatter = HtmlFormatter(nowrap=True) buf = [] for ttype, value in apply_filters(tokensource, [StripDocTestFilter()]): if ttype in Token.Sql: for t, v in HtmlFormatter._format_lines(self, iter(buf)): yield t, v buf = [] if ttype is Token.Sql: yield ( 1, "<div class='show_sql'>%s</div>" % pygments.highlight( re.sub(r"(?:{stop}|\n+)\s*$", "", value), sql_lexer, formatter, ), ) elif ttype is Token.Sql.Link: yield 1, "<a href='#' class='sql_link'>sql</a>" elif ttype is Token.Sql.Popup: yield ( 1, "<div class='popup_sql'>%s</div>" % pygments.highlight( re.sub(r"(?:{stop}|\n+)$", "", value), sql_lexer, formatter, ), ) else: buf.append((ttype, value)) for t, v in _strip_trailing_whitespace( HtmlFormatter._format_lines(self, iter(buf)) ): yield t, v