def test_guess_decode(self): # UTF-8 should be decoded as UTF-8 s = util.guess_decode(u'\xff'.encode('utf-8')) self.assertEqual(s, (u'\xff', 'utf-8')) # otherwise, it could be latin1 or the locale encoding... import locale s = util.guess_decode(b'\xff') self.assertTrue(s[1] in ('latin1', locale.getpreferredencoding()))
def test_guess_decode(): # UTF-8 should be decoded as UTF-8 s = util.guess_decode('\xff'.encode()) assert s == ('\xff', 'utf-8') # otherwise, it could be latin1 or the locale encoding... import locale s = util.guess_decode(b'\xff') assert s[1] in ('latin1', locale.getpreferredencoding())
def guess_lexer(_text, **options): """Guess a lexer by strong distinctions in the text (eg, shebang).""" if not isinstance(_text, text_type): inencoding = options.get('inencoding', options.get('encoding')) if inencoding: _text = _text.decode(inencoding or 'utf8') else: _text, _ = guess_decode(_text) # try to get a vim modeline first ft = get_filetype_from_buffer(_text) if ft is not None: try: return get_lexer_by_name(ft, **options) except ClassNotFound: pass best_lexer = [0.0, None] for lexer in _iter_lexerclasses(): rv = lexer.analyse_text(_text) if rv == 1.0: return lexer(**options) if rv > best_lexer[0]: best_lexer[:] = (rv, lexer) if not best_lexer[0] or best_lexer[1] is None: raise ClassNotFound('no lexer matching the text found') return best_lexer[1](**options)
def get_tokens(self, text, unfiltered=False): """ Return an iterable of (tokentype, value) pairs generated from `text`. If `unfiltered` is set to `True`, the filtering mechanism is bypassed even if filters are defined. Also preprocess the text, i.e. expand tabs and strip it if wanted and applies registered filters. """ if not isinstance(text, str): if self.encoding == 'guess': text, _ = guess_decode(text) elif self.encoding == 'chardet': try: import chardet except ImportError as e: raise ImportError( 'To enable chardet encoding guessing, ' 'please install the chardet library ' 'from http://chardet.feedparser.org/') from e # check for BOM first decoded = None for bom, encoding in _encoding_map: if text.startswith(bom): decoded = text[len(bom):].decode(encoding, 'replace') break # no BOM found, so use chardet if decoded is None: enc = chardet.detect(text[:1024]) # Guess using first 1KB decoded = text.decode( enc.get('encoding') or 'utf-8', 'replace') text = decoded else: text = text.decode(self.encoding) if text.startswith('\ufeff'): text = text[len('\ufeff'):] else: if text.startswith('\ufeff'): text = text[len('\ufeff'):] # text now *is* a unicode string text = text.replace('\r\n', '\n') text = text.replace('\r', '\n') if self.stripall: text = text.strip() elif self.stripnl: text = text.strip('\n') if self.tabsize > 0: text = text.expandtabs(self.tabsize) if self.ensurenl and not text.endswith('\n'): text += '\n' def streamer(): for _, t, v in self.get_tokens_unprocessed(text): yield t, v stream = streamer() if not unfiltered: stream = apply_filters(stream, self.filters, self) return stream
def get_tokens(self, text, unfiltered=False): """ Return an iterable of (tokentype, value) pairs generated from `text`. If `unfiltered` is set to `True`, the filtering mechanism is bypassed even if filters are defined. Also preprocess the text, i.e. expand tabs and strip it if wanted and applies registered filters. """ if not isinstance(text, text_type): if self.encoding == 'guess': text, _ = guess_decode(text) elif self.encoding == 'chardet': try: import chardet except ImportError: raise ImportError('To enable chardet encoding guessing, ' 'please install the chardet library ' 'from http://chardet.feedparser.org/') # check for BOM first decoded = None for bom, encoding in _encoding_map: if text.startswith(bom): decoded = text[len(bom):].decode(encoding, 'replace') break # no BOM found, so use chardet if decoded is None: enc = chardet.detect(text[:1024]) # Guess using first 1KB decoded = text.decode(enc.get('encoding') or 'utf-8', 'replace') text = decoded else: text = text.decode(self.encoding) if text.startswith(u'\ufeff'): text = text[len(u'\ufeff'):] else: if text.startswith(u'\ufeff'): text = text[len(u'\ufeff'):] # text now *is* a unicode string text = text.replace('\r\n', '\n') text = text.replace('\r', '\n') if self.stripall: text = text.strip() elif self.stripnl: text = text.strip('\n') if self.tabsize > 0: text = text.expandtabs(self.tabsize) if self.ensurenl and not text.endswith('\n'): text += '\n' def streamer(): for _, t, v in self.get_tokens_unprocessed(text): yield t, v stream = streamer() if not unfiltered: stream = apply_filters(stream, self.filters, self) return stream
def read_input(filename, encoding): with open(filename, 'rb') as infp: code = infp.read() if not encoding or encoding == 'guess': code, encoding = guess_decode(code) else: code = code.decode(encoding) return code, encoding
def _pygments_get_tokens_preprocess(self, text, unfiltered=False): """ Return an iterable of (tokentype, value) pairs generated from ``text``. If ``unfiltered`` is set to ``True``, the filtering mechanism is bypassed even if filters are defined. Also preprocess the text, i.e. expand tabs and strip it if wanted and applies registered filters. """ if not isinstance(text, str): if self.encoding == "guess": text, _ = guess_decode(text) elif self.encoding == "chardet": try: import chardet except ImportError: raise ImportError("To enable chardet encoding guessing, " "please install the chardet library " "from http://chardet.feedparser.org/") # check for BOM first decoded = None for bom, encoding in _encoding_map: if text.startswith(bom): decoded = text[len(bom):].decode(encoding, "replace") break # no BOM found, so use chardet if decoded is None: enc = chardet.detect(text[:1024]) # Guess using first 1KB decoded = text.decode( enc.get("encoding") or "utf-8", "replace") text = decoded else: text = text.decode(self.encoding) if text.startswith(u"\ufeff"): text = text[len(u"\ufeff"):] else: if text.startswith(u"\ufeff"): text = text[len(u"\ufeff"):] # text now *is* a unicode string text = text.replace("\r\n", "\n") text = text.replace("\r", "\n") if self.stripall: text = text.strip() elif self.stripnl: text = text.strip("\n") if self.tabsize > 0: text = text.expandtabs(self.tabsize) if self.ensurenl and not text.endswith("\n"): text += "\n" # EDIT: This is not from the original Pygments code. It was added to return # the preprocessed text. return text
def find_lexer_class_for_filename(_fn, code=None): """Get a lexer for a filename. If multiple lexers match the filename pattern, use ``analyse_text()`` to figure out which one is more appropriate. Returns None if not found. """ matches = [] fn = basename(_fn) for modname, name, _, filenames, _ in itervalues(LEXERS): for filename in filenames: if _fn_matches(fn, filename): if name not in _lexer_cache: _load_lexers(modname) matches.append((_lexer_cache[name], filename)) for cls in find_plugin_lexers(): for filename in cls.filenames: if _fn_matches(fn, filename): matches.append((cls, filename)) if sys.version_info > (3, ) and isinstance(code, bytes): # decode it, since all analyse_text functions expect unicode code = guess_decode(code) def get_rating(info): cls, filename = info # explicit patterns get a bonus bonus = '*' not in filename and 0.5 or 0 # The class _always_ defines analyse_text because it's included in # the Lexer class. The default implementation returns None which # gets turned into 0.0. Run scripts/detect_missing_analyse_text.py # to find lexers which need it overridden. if code: return cls.analyse_text(code) + bonus, cls.__name__ return cls.priority + bonus, cls.__name__ if matches: matches.sort(key=get_rating) # print "Possible lexers, after sort:", matches return matches[-1][0]
def find_lexer_class_for_filename(_fn, code=None): """Get a lexer for a filename. If multiple lexers match the filename pattern, use ``analyse_text()`` to figure out which one is more appropriate. Returns None if not found. """ matches = [] fn = basename(_fn) for modname, name, _, filenames, _ in itervalues(LEXERS): for filename in filenames: if _fn_matches(fn, filename): if name not in _lexer_cache: _load_lexers(modname) matches.append((_lexer_cache[name], filename)) for cls in find_plugin_lexers(): for filename in cls.filenames: if _fn_matches(fn, filename): matches.append((cls, filename)) if sys.version_info > (3,) and isinstance(code, bytes): # decode it, since all analyse_text functions expect unicode code = guess_decode(code) def get_rating(info): cls, filename = info # explicit patterns get a bonus bonus = '*' not in filename and 0.5 or 0 # The class _always_ defines analyse_text because it's included in # the Lexer class. The default implementation returns None which # gets turned into 0.0. Run scripts/detect_missing_analyse_text.py # to find lexers which need it overridden. if code: return cls.analyse_text(code) + bonus return cls.priority + bonus if matches: matches.sort(key=get_rating) # print "Possible lexers, after sort:", matches return matches[-1][0]
def main(args=sys.argv): """ Main command line entry point. """ # pylint: disable-msg=R0911,R0912,R0915 usage = USAGE % ((args[0],) * 6) try: popts, args = getopt.getopt(args[1:], "l:f:F:o:O:P:LS:a:N:hVHgs") except getopt.GetoptError: print(usage, file=sys.stderr) return 2 opts = {} O_opts = [] P_opts = [] F_opts = [] for opt, arg in popts: if opt == "-O": O_opts.append(arg) elif opt == "-P": P_opts.append(arg) elif opt == "-F": F_opts.append(arg) opts[opt] = arg if opts.pop("-h", None) is not None: print(usage) return 0 if opts.pop("-V", None) is not None: print("Pygments version %s, (c) 2006-2014 by Georg Brandl." % __version__) return 0 # handle ``pygmentize -L`` L_opt = opts.pop("-L", None) if L_opt is not None: if opts: print(usage, file=sys.stderr) return 2 # print version main(["", "-V"]) if not args: args = ["lexer", "formatter", "filter", "style"] for arg in args: _print_list(arg.rstrip("s")) return 0 # handle ``pygmentize -H`` H_opt = opts.pop("-H", None) if H_opt is not None: if opts or len(args) != 2: print(usage, file=sys.stderr) return 2 what, name = args if what not in ("lexer", "formatter", "filter"): print(usage, file=sys.stderr) return 2 _print_help(what, name) return 0 # parse -O options parsed_opts = _parse_options(O_opts) opts.pop("-O", None) # parse -P options for p_opt in P_opts: try: name, value = p_opt.split("=", 1) except ValueError: parsed_opts[p_opt] = True else: parsed_opts[name] = value opts.pop("-P", None) # encodings inencoding = parsed_opts.get("inencoding", parsed_opts.get("encoding")) outencoding = parsed_opts.get("outencoding", parsed_opts.get("encoding")) # handle ``pygmentize -N`` infn = opts.pop("-N", None) if infn is not None: try: lexer = get_lexer_for_filename(infn, **parsed_opts) except ClassNotFound as err: lexer = TextLexer() except OptionError as err: print("Error:", err, file=sys.stderr) return 1 print(lexer.aliases[0]) return 0 # handle ``pygmentize -S`` S_opt = opts.pop("-S", None) a_opt = opts.pop("-a", None) if S_opt is not None: f_opt = opts.pop("-f", None) if not f_opt: print(usage, file=sys.stderr) return 2 if opts or args: print(usage, file=sys.stderr) return 2 try: parsed_opts["style"] = S_opt fmter = get_formatter_by_name(f_opt, **parsed_opts) except ClassNotFound as err: print(err, file=sys.stderr) return 1 arg = a_opt or "" try: print(fmter.get_style_defs(arg)) except Exception as err: print("Error:", err, file=sys.stderr) return 1 return 0 # if no -S is given, -a is not allowed if a_opt is not None: print(usage, file=sys.stderr) return 2 # parse -F options F_opts = _parse_filters(F_opts) opts.pop("-F", None) # select lexer lexer = opts.pop("-l", None) if lexer: try: lexer = get_lexer_by_name(lexer, **parsed_opts) except (OptionError, ClassNotFound) as err: print("Error:", err, file=sys.stderr) return 1 # read input code code = None if args: if len(args) > 1: print(usage, file=sys.stderr) return 2 if "-s" in opts: print("Error: -s option not usable when input file specified", file=sys.stderr) return 1 infn = args[0] try: with open(infn, "rb") as infp: code = infp.read() except Exception as err: print("Error: cannot read infile:", err, file=sys.stderr) return 1 if not inencoding: code, inencoding = guess_decode(code) # do we have to guess the lexer? if not lexer: try: lexer = get_lexer_for_filename(infn, code, **parsed_opts) except ClassNotFound as err: if "-g" in opts: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) else: print("Error:", err, file=sys.stderr) return 1 except OptionError as err: print("Error:", err, file=sys.stderr) return 1 elif "-s" not in opts: # treat stdin as full file (-s support is later) # read code from terminal, always in binary mode since we want to # decode ourselves and be tolerant with it if sys.version_info > (3,): # Python 3: we have to use .buffer to get a binary stream code = sys.stdin.buffer.read() else: code = sys.stdin.read() if not inencoding: code, inencoding = guess_decode_from_terminal(code, sys.stdin) # else the lexer will do the decoding if not lexer: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) # select formatter outfn = opts.pop("-o", None) fmter = opts.pop("-f", None) if fmter: try: fmter = get_formatter_by_name(fmter, **parsed_opts) except (OptionError, ClassNotFound) as err: print("Error:", err, file=sys.stderr) return 1 if outfn: if not fmter: try: fmter = get_formatter_for_filename(outfn, **parsed_opts) except (OptionError, ClassNotFound) as err: print("Error:", err, file=sys.stderr) return 1 try: outfile = open(outfn, "wb") except Exception as err: print("Error: cannot open outfile:", err, file=sys.stderr) return 1 else: if not fmter: fmter = TerminalFormatter(**parsed_opts) if sys.version_info > (3,): # Python 3: we have to use .buffer to get a binary stream outfile = sys.stdout.buffer else: outfile = sys.stdout # determine output encoding if not explicitly selected if not outencoding: if outfn: # output file? use lexer encoding for now (can still be None) fmter.encoding = inencoding else: # else use terminal encoding fmter.encoding = terminal_encoding(sys.stdout) # provide coloring under Windows, if possible if not outfn and sys.platform in ("win32", "cygwin") and fmter.name in ("Terminal", "Terminal256"): # unfortunately colorama doesn't support binary streams on Py3 if sys.version_info > (3,): import io outfile = io.TextIOWrapper(outfile, encoding=fmter.encoding) fmter.encoding = None try: import colorama.initialise except ImportError: pass else: outfile = colorama.initialise.wrap_stream(outfile, convert=None, strip=None, autoreset=False, wrap=True) # When using the LaTeX formatter and the option `escapeinside` is # specified, we need a special lexer which collects escaped text # before running the chosen language lexer. escapeinside = parsed_opts.get("escapeinside", "") if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter): left = escapeinside[0] right = escapeinside[1] lexer = LatexEmbeddedLexer(left, right, lexer) # ... and do it! try: # process filters for fname, fopts in F_opts: lexer.add_filter(fname, **fopts) if "-s" not in opts: # process whole input as per normal... highlight(code, lexer, fmter, outfile) else: if not lexer: print("Error: when using -s a lexer has to be selected with -l", file=sys.stderr) return 1 # line by line processing of stdin (eg: for 'tail -f')... try: while 1: if sys.version_info > (3,): # Python 3: we have to use .buffer to get a binary stream line = sys.stdin.buffer.readline() else: line = sys.stdin.readline() if not line: break if not inencoding: line = guess_decode_from_terminal(line, sys.stdin)[0] highlight(line, lexer, fmter, outfile) if hasattr(outfile, "flush"): outfile.flush() except KeyboardInterrupt: return 0 except Exception: raise import traceback info = traceback.format_exception(*sys.exc_info()) msg = info[-1].strip() if len(info) >= 3: # extract relevant file and position info msg += "\n (f%s)" % info[-2].split("\n")[0].strip()[1:] print(file=sys.stderr) print("*** Error while highlighting:", file=sys.stderr) print(msg, file=sys.stderr) return 1 return 0
def main_inner(popts, args, usage): opts = {} O_opts = [] P_opts = [] F_opts = [] for opt, arg in popts: if opt == '-O': O_opts.append(arg) elif opt == '-P': P_opts.append(arg) elif opt == '-F': F_opts.append(arg) opts[opt] = arg if opts.pop('-h', None) is not None: print(usage) return 0 if opts.pop('-V', None) is not None: print('Pygments version %s, (c) 2006-2017 by Georg Brandl.' % __version__) return 0 # handle ``pygmentize -L`` L_opt = opts.pop('-L', None) if L_opt is not None: if opts: print(usage, file=sys.stderr) return 2 # print version main(['', '-V']) if not args: args = ['lexer', 'formatter', 'filter', 'style'] for arg in args: _print_list(arg.rstrip('s')) return 0 # handle ``pygmentize -H`` H_opt = opts.pop('-H', None) if H_opt is not None: if opts or len(args) != 2: print(usage, file=sys.stderr) return 2 what, name = args # pylint: disable=unbalanced-tuple-unpacking if what not in ('lexer', 'formatter', 'filter'): print(usage, file=sys.stderr) return 2 return _print_help(what, name) # parse -O options parsed_opts = _parse_options(O_opts) opts.pop('-O', None) # parse -P options for p_opt in P_opts: try: name, value = p_opt.split('=', 1) except ValueError: parsed_opts[p_opt] = True else: parsed_opts[name] = value opts.pop('-P', None) # encodings inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding')) outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding')) # handle ``pygmentize -N`` infn = opts.pop('-N', None) if infn is not None: lexer = find_lexer_class_for_filename(infn) if lexer is None: lexer = TextLexer print(lexer.aliases[0]) return 0 # handle ``pygmentize -S`` S_opt = opts.pop('-S', None) a_opt = opts.pop('-a', None) if S_opt is not None: f_opt = opts.pop('-f', None) if not f_opt: print(usage, file=sys.stderr) return 2 if opts or args: print(usage, file=sys.stderr) return 2 try: parsed_opts['style'] = S_opt fmter = get_formatter_by_name(f_opt, **parsed_opts) except ClassNotFound as err: print(err, file=sys.stderr) return 1 print(fmter.get_style_defs(a_opt or '')) return 0 # if no -S is given, -a is not allowed if a_opt is not None: print(usage, file=sys.stderr) return 2 # parse -F options F_opts = _parse_filters(F_opts) opts.pop('-F', None) allow_custom_lexer_formatter = False # -x: allow custom (eXternal) lexers and formatters if opts.pop('-x', None) is not None: allow_custom_lexer_formatter = True # select lexer lexer = None # given by name? lexername = opts.pop('-l', None) if lexername: # custom lexer, located relative to user's cwd if allow_custom_lexer_formatter and '.py' in lexername: try: if ':' in lexername: filename, name = lexername.rsplit(':', 1) lexer = load_lexer_from_file(filename, name, **parsed_opts) else: lexer = load_lexer_from_file(lexername, **parsed_opts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 else: try: lexer = get_lexer_by_name(lexername, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 # read input code code = None if args: if len(args) > 1: print(usage, file=sys.stderr) return 2 if '-s' in opts: print('Error: -s option not usable when input file specified', file=sys.stderr) return 2 infn = args[0] try: with open(infn, 'rb') as infp: code = infp.read() except Exception as err: print('Error: cannot read infile:', err, file=sys.stderr) return 1 if not inencoding: code, inencoding = guess_decode(code) # do we have to guess the lexer? if not lexer: try: lexer = get_lexer_for_filename(infn, code, **parsed_opts) except ClassNotFound as err: if '-g' in opts: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) else: print('Error:', err, file=sys.stderr) return 1 except OptionError as err: print('Error:', err, file=sys.stderr) return 1 elif '-s' not in opts: # treat stdin as full file (-s support is later) # read code from terminal, always in binary mode since we want to # decode ourselves and be tolerant with it if sys.version_info > (3,): # Python 3: we have to use .buffer to get a binary stream code = sys.stdin.buffer.read() else: code = sys.stdin.read() if not inencoding: code, inencoding = guess_decode_from_terminal(code, sys.stdin) # else the lexer will do the decoding if not lexer: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) else: # -s option needs a lexer with -l if not lexer: print('Error: when using -s a lexer has to be selected with -l', file=sys.stderr) return 2 # process filters for fname, fopts in F_opts: try: lexer.add_filter(fname, **fopts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 # select formatter outfn = opts.pop('-o', None) fmter = opts.pop('-f', None) if fmter: # custom formatter, located relative to user's cwd if allow_custom_lexer_formatter and '.py' in fmter: try: if ':' in fmter: file, fmtername = fmter.rsplit(':', 1) fmter = load_formatter_from_file(file, fmtername, **parsed_opts) else: fmter = load_formatter_from_file(fmter, **parsed_opts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 else: try: fmter = get_formatter_by_name(fmter, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 if outfn: if not fmter: try: fmter = get_formatter_for_filename(outfn, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 try: outfile = open(outfn, 'wb') except Exception as err: print('Error: cannot open outfile:', err, file=sys.stderr) return 1 else: if not fmter: fmter = TerminalFormatter(**parsed_opts) if sys.version_info > (3,): # Python 3: we have to use .buffer to get a binary stream outfile = sys.stdout.buffer else: outfile = sys.stdout # determine output encoding if not explicitly selected if not outencoding: if outfn: # output file? use lexer encoding for now (can still be None) fmter.encoding = inencoding else: # else use terminal encoding fmter.encoding = terminal_encoding(sys.stdout) # provide coloring under Windows, if possible if not outfn and sys.platform in ('win32', 'cygwin') and \ fmter.name in ('Terminal', 'Terminal256'): # pragma: no cover # unfortunately colorama doesn't support binary streams on Py3 if sys.version_info > (3,): from pygments.util import UnclosingTextIOWrapper outfile = UnclosingTextIOWrapper(outfile, encoding=fmter.encoding) fmter.encoding = None try: import colorama.initialise except ImportError: pass else: outfile = colorama.initialise.wrap_stream( outfile, convert=None, strip=None, autoreset=False, wrap=True) # When using the LaTeX formatter and the option `escapeinside` is # specified, we need a special lexer which collects escaped text # before running the chosen language lexer. escapeinside = parsed_opts.get('escapeinside', '') if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter): left = escapeinside[0] right = escapeinside[1] lexer = LatexEmbeddedLexer(left, right, lexer) # ... and do it! if '-s' not in opts: # process whole input as per normal... highlight(code, lexer, fmter, outfile) return 0 else: # line by line processing of stdin (eg: for 'tail -f')... try: while 1: if sys.version_info > (3,): # Python 3: we have to use .buffer to get a binary stream line = sys.stdin.buffer.readline() else: line = sys.stdin.readline() if not line: break if not inencoding: line = guess_decode_from_terminal(line, sys.stdin)[0] highlight(line, lexer, fmter, outfile) if hasattr(outfile, 'flush'): outfile.flush() return 0 except KeyboardInterrupt: # pragma: no cover return 0
def main_inner(parser, argns): if argns.help: parser.print_help() return 0 if argns.V: print('Pygments version %s, (c) 2006-2021 by Georg Brandl, Matthäus ' 'Chajdas and contributors.' % __version__) return 0 def is_only_option(opt): return not any(v for (k, v) in vars(argns).items() if k != opt) # handle ``pygmentize -L`` if argns.L is not None: if not is_only_option('L'): parser.print_help(sys.stderr) return 2 # print version main(['', '-V']) allowed_types = {'lexer', 'formatter', 'filter', 'style'} largs = [arg.rstrip('s') for arg in argns.L] if any(arg not in allowed_types for arg in largs): parser.print_help(sys.stderr) return 0 if not largs: largs = allowed_types for arg in largs: _print_list(arg) return 0 # handle ``pygmentize -H`` if argns.H: if not is_only_option('H'): parser.print_help(sys.stderr) return 2 what, name = argns.H if what not in ('lexer', 'formatter', 'filter'): parser.print_help(sys.stderr) return 2 return _print_help(what, name) # parse -O options parsed_opts = _parse_options(argns.O or []) # parse -P options for p_opt in argns.P or []: try: name, value = p_opt.split('=', 1) except ValueError: parsed_opts[p_opt] = True else: parsed_opts[name] = value # encodings inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding')) outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding')) # handle ``pygmentize -N`` if argns.N: lexer = find_lexer_class_for_filename(argns.N) if lexer is None: lexer = TextLexer print(lexer.aliases[0]) return 0 # handle ``pygmentize -C`` if argns.C: inp = sys.stdin.buffer.read() try: lexer = guess_lexer(inp, inencoding=inencoding) except ClassNotFound: lexer = TextLexer print(lexer.aliases[0]) return 0 # handle ``pygmentize -S`` S_opt = argns.S a_opt = argns.a if S_opt is not None: f_opt = argns.f if not f_opt: parser.print_help(sys.stderr) return 2 if argns.l or argns.INPUTFILE: parser.print_help(sys.stderr) return 2 try: parsed_opts['style'] = S_opt fmter = get_formatter_by_name(f_opt, **parsed_opts) except ClassNotFound as err: print(err, file=sys.stderr) return 1 print(fmter.get_style_defs(a_opt or '')) return 0 # if no -S is given, -a is not allowed if argns.a is not None: parser.print_help(sys.stderr) return 2 # parse -F options F_opts = _parse_filters(argns.F or []) # -x: allow custom (eXternal) lexers and formatters allow_custom_lexer_formatter = bool(argns.x) # select lexer lexer = None # given by name? lexername = argns.l if lexername: # custom lexer, located relative to user's cwd if allow_custom_lexer_formatter and '.py' in lexername: try: filename = None name = None if ':' in lexername: filename, name = lexername.rsplit(':', 1) if '.py' in name: # This can happen on Windows: If the lexername is # C:\lexer.py -- return to normal load path in that case name = None if filename and name: lexer = load_lexer_from_file(filename, name, **parsed_opts) else: lexer = load_lexer_from_file(lexername, **parsed_opts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 else: try: lexer = get_lexer_by_name(lexername, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 # read input code code = None if argns.INPUTFILE: if argns.s: print('Error: -s option not usable when input file specified', file=sys.stderr) return 2 infn = argns.INPUTFILE try: with open(infn, 'rb') as infp: code = infp.read() except Exception as err: print('Error: cannot read infile:', err, file=sys.stderr) return 1 if not inencoding: code, inencoding = guess_decode(code) # do we have to guess the lexer? if not lexer: try: lexer = get_lexer_for_filename(infn, code, **parsed_opts) except ClassNotFound as err: if argns.g: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) else: print('Error:', err, file=sys.stderr) return 1 except OptionError as err: print('Error:', err, file=sys.stderr) return 1 elif not argns.s: # treat stdin as full file (-s support is later) # read code from terminal, always in binary mode since we want to # decode ourselves and be tolerant with it code = sys.stdin.buffer.read() # use .buffer to get a binary stream if not inencoding: code, inencoding = guess_decode_from_terminal(code, sys.stdin) # else the lexer will do the decoding if not lexer: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) else: # -s option needs a lexer with -l if not lexer: print('Error: when using -s a lexer has to be selected with -l', file=sys.stderr) return 2 # process filters for fname, fopts in F_opts: try: lexer.add_filter(fname, **fopts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 # select formatter outfn = argns.o fmter = argns.f if fmter: # custom formatter, located relative to user's cwd if allow_custom_lexer_formatter and '.py' in fmter: try: filename = None name = None if ':' in fmter: # Same logic as above for custom lexer filename, name = fmter.rsplit(':', 1) if '.py' in name: name = None if filename and name: fmter = load_formatter_from_file(filename, name, **parsed_opts) else: fmter = load_formatter_from_file(fmter, **parsed_opts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 else: try: fmter = get_formatter_by_name(fmter, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 if outfn: if not fmter: try: fmter = get_formatter_for_filename(outfn, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 try: outfile = open(outfn, 'wb') except Exception as err: print('Error: cannot open outfile:', err, file=sys.stderr) return 1 else: if not fmter: if '256' in os.environ.get('TERM', ''): fmter = Terminal256Formatter(**parsed_opts) else: fmter = TerminalFormatter(**parsed_opts) outfile = sys.stdout.buffer # determine output encoding if not explicitly selected if not outencoding: if outfn: # output file? use lexer encoding for now (can still be None) fmter.encoding = inencoding else: # else use terminal encoding fmter.encoding = terminal_encoding(sys.stdout) # provide coloring under Windows, if possible if not outfn and sys.platform in ('win32', 'cygwin') and \ fmter.name in ('Terminal', 'Terminal256'): # pragma: no cover # unfortunately colorama doesn't support binary streams on Py3 outfile = UnclosingTextIOWrapper(outfile, encoding=fmter.encoding) fmter.encoding = None try: import colorama.initialise except ImportError: pass else: outfile = colorama.initialise.wrap_stream(outfile, convert=None, strip=None, autoreset=False, wrap=True) # When using the LaTeX formatter and the option `escapeinside` is # specified, we need a special lexer which collects escaped text # before running the chosen language lexer. escapeinside = parsed_opts.get('escapeinside', '') if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter): left = escapeinside[0] right = escapeinside[1] lexer = LatexEmbeddedLexer(left, right, lexer) # ... and do it! if not argns.s: # process whole input as per normal... try: highlight(code, lexer, fmter, outfile) finally: if outfn: outfile.close() return 0 else: # line by line processing of stdin (eg: for 'tail -f')... try: while 1: line = sys.stdin.buffer.readline() if not line: break if not inencoding: line = guess_decode_from_terminal(line, sys.stdin)[0] highlight(line, lexer, fmter, outfile) if hasattr(outfile, 'flush'): outfile.flush() return 0 except KeyboardInterrupt: # pragma: no cover return 0 finally: if outfn: outfile.close()
def main(args=sys.argv): """ Main command line entry point. """ # pylint: disable-msg=R0911,R0912,R0915 usage = USAGE % ((args[0], ) * 6) try: popts, args = getopt.getopt(args[1:], "l:f:F:o:O:P:LS:a:N:hVHgs") except getopt.GetoptError: print(usage, file=sys.stderr) return 2 opts = {} O_opts = [] P_opts = [] F_opts = [] for opt, arg in popts: if opt == '-O': O_opts.append(arg) elif opt == '-P': P_opts.append(arg) elif opt == '-F': F_opts.append(arg) opts[opt] = arg if opts.pop('-h', None) is not None: print(usage) return 0 if opts.pop('-V', None) is not None: print('Pygments version %s, (c) 2006-2014 by Georg Brandl.' % __version__) return 0 # handle ``pygmentize -L`` L_opt = opts.pop('-L', None) if L_opt is not None: if opts: print(usage, file=sys.stderr) return 2 # print version main(['', '-V']) if not args: args = ['lexer', 'formatter', 'filter', 'style'] for arg in args: _print_list(arg.rstrip('s')) return 0 # handle ``pygmentize -H`` H_opt = opts.pop('-H', None) if H_opt is not None: if opts or len(args) != 2: print(usage, file=sys.stderr) return 2 what, name = args if what not in ('lexer', 'formatter', 'filter'): print(usage, file=sys.stderr) return 2 _print_help(what, name) return 0 # parse -O options parsed_opts = _parse_options(O_opts) opts.pop('-O', None) # parse -P options for p_opt in P_opts: try: name, value = p_opt.split('=', 1) except ValueError: parsed_opts[p_opt] = True else: parsed_opts[name] = value opts.pop('-P', None) # encodings inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding')) outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding')) # handle ``pygmentize -N`` infn = opts.pop('-N', None) if infn is not None: try: lexer = get_lexer_for_filename(infn, **parsed_opts) except ClassNotFound as err: lexer = TextLexer() except OptionError as err: print('Error:', err, file=sys.stderr) return 1 print(lexer.aliases[0]) return 0 # handle ``pygmentize -S`` S_opt = opts.pop('-S', None) a_opt = opts.pop('-a', None) if S_opt is not None: f_opt = opts.pop('-f', None) if not f_opt: print(usage, file=sys.stderr) return 2 if opts or args: print(usage, file=sys.stderr) return 2 try: parsed_opts['style'] = S_opt fmter = get_formatter_by_name(f_opt, **parsed_opts) except ClassNotFound as err: print(err, file=sys.stderr) return 1 arg = a_opt or '' try: print(fmter.get_style_defs(arg)) except Exception as err: print('Error:', err, file=sys.stderr) return 1 return 0 # if no -S is given, -a is not allowed if a_opt is not None: print(usage, file=sys.stderr) return 2 # parse -F options F_opts = _parse_filters(F_opts) opts.pop('-F', None) # select formatter outfn = opts.pop('-o', None) fmter = opts.pop('-f', None) if fmter: try: fmter = get_formatter_by_name(fmter, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 if outfn: if not fmter: try: fmter = get_formatter_for_filename(outfn, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 try: outfile = open(outfn, 'wb') except Exception as err: print('Error: cannot open outfile:', err, file=sys.stderr) return 1 else: if not fmter: fmter = TerminalFormatter(**parsed_opts) if sys.version_info > (3, ): # Python 3: we have to use .buffer to get a binary stream outfile = sys.stdout.buffer else: outfile = sys.stdout # determine output encoding if not explicitly selected if not outencoding: if outfn: # output file? -> encoding pass-through fmter.encoding = inencoding else: # else use terminal encoding fmter.encoding = terminal_encoding(sys.stdout) # provide coloring under Windows, if possible if not outfn and sys.platform in ('win32', 'cygwin') and \ fmter.name in ('Terminal', 'Terminal256'): # unfortunately colorama doesn't support binary streams on Py3 if sys.version_info > (3, ): import io outfile = io.TextIOWrapper(outfile, encoding=fmter.encoding) fmter.encoding = None try: import colorama.initialise except ImportError: pass else: outfile = colorama.initialise.wrap_stream(outfile, convert=None, strip=None, autoreset=False, wrap=True) # select lexer lexer = opts.pop('-l', None) if lexer: try: lexer = get_lexer_by_name(lexer, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 # read input code code = None if args: if len(args) > 1: print(usage, file=sys.stderr) return 2 if '-s' in opts: print('Error: -s option not usable when input file specified', file=sys.stderr) return 1 infn = args[0] try: with open(infn, 'rb') as infp: code = infp.read() except Exception as err: print('Error: cannot read infile:', err, file=sys.stderr) return 1 if not inencoding: code, inencoding = guess_decode(code) # do we have to guess the lexer? if not lexer: try: lexer = get_lexer_for_filename(infn, code, **parsed_opts) except ClassNotFound as err: if '-g' in opts: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) else: print('Error:', err, file=sys.stderr) return 1 except OptionError as err: print('Error:', err, file=sys.stderr) return 1 elif '-s' not in opts: # treat stdin as full file (-s support is later) # read code from terminal, always in binary mode since we want to # decode ourselves and be tolerant with it if sys.version_info > (3, ): # Python 3: we have to use .buffer to get a binary stream code = sys.stdin.buffer.read() else: code = sys.stdin.read() if not inencoding: code, inencoding = guess_decode_from_terminal(code, sys.stdin) # else the lexer will do the decoding if not lexer: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) # When using the LaTeX formatter and the option `escapeinside` is # specified, we need a special lexer which collects escaped text # before running the chosen language lexer. escapeinside = parsed_opts.get('escapeinside', '') if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter): left = escapeinside[0] right = escapeinside[1] lexer = LatexEmbeddedLexer(left, right, lexer) # ... and do it! try: # process filters for fname, fopts in F_opts: lexer.add_filter(fname, **fopts) if '-s' not in opts: # process whole input as per normal... highlight(code, lexer, fmter, outfile) else: if not lexer: print( 'Error: when using -s a lexer has to be selected with -l', file=sys.stderr) return 1 # line by line processing of stdin (eg: for 'tail -f')... try: while 1: if sys.version_info > (3, ): # Python 3: we have to use .buffer to get a binary stream line = sys.stdin.buffer.readline() else: line = sys.stdin.readline() if not line: break if not inencoding: line = guess_decode_from_terminal(line, sys.stdin)[0] highlight(line, lexer, fmter, outfile) if hasattr(outfile, 'flush'): outfile.flush() except KeyboardInterrupt: return 0 except Exception: import traceback info = traceback.format_exception(*sys.exc_info()) msg = info[-1].strip() if len(info) >= 3: # extract relevant file and position info msg += '\n (f%s)' % info[-2].split('\n')[0].strip()[1:] print(file=sys.stderr) print('*** Error while highlighting:', file=sys.stderr) print(msg, file=sys.stderr) return 1 return 0
def main_inner(popts, args, usage): opts = {} O_opts = [] P_opts = [] F_opts = [] for opt, arg in popts: if opt == '-O': O_opts.append(arg) elif opt == '-P': P_opts.append(arg) elif opt == '-F': F_opts.append(arg) opts[opt] = arg if opts.pop('-h', None) is not None: print(usage) return 0 if opts.pop('-V', None) is not None: print('Pygments version %s, (c) 2006-2017 by Georg Brandl.' % __version__) return 0 # handle ``pygmentize -L`` L_opt = opts.pop('-L', None) if L_opt is not None: if opts: print(usage, file=sys.stderr) return 2 # print version main(['', '-V']) if not args: args = ['lexer', 'formatter', 'filter', 'style'] for arg in args: _print_list(arg.rstrip('s')) return 0 # handle ``pygmentize -H`` H_opt = opts.pop('-H', None) if H_opt is not None: if opts or len(args) != 2: print(usage, file=sys.stderr) return 2 what, name = args # pylint: disable=unbalanced-tuple-unpacking if what not in ('lexer', 'formatter', 'filter'): print(usage, file=sys.stderr) return 2 return _print_help(what, name) # parse -O options parsed_opts = _parse_options(O_opts) opts.pop('-O', None) # parse -P options for p_opt in P_opts: try: name, value = p_opt.split('=', 1) except ValueError: parsed_opts[p_opt] = True else: parsed_opts[name] = value opts.pop('-P', None) # encodings inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding')) outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding')) # handle ``pygmentize -N`` infn = opts.pop('-N', None) if infn is not None: lexer = find_lexer_class_for_filename(infn) if lexer is None: lexer = TextLexer print(lexer.aliases[0]) return 0 # handle ``pygmentize -S`` S_opt = opts.pop('-S', None) a_opt = opts.pop('-a', None) if S_opt is not None: f_opt = opts.pop('-f', None) if not f_opt: print(usage, file=sys.stderr) return 2 if opts or args: print(usage, file=sys.stderr) return 2 try: parsed_opts['style'] = S_opt fmter = get_formatter_by_name(f_opt, **parsed_opts) except ClassNotFound as err: print(err, file=sys.stderr) return 1 print(fmter.get_style_defs(a_opt or '')) return 0 # if no -S is given, -a is not allowed if a_opt is not None: print(usage, file=sys.stderr) return 2 # parse -F options F_opts = _parse_filters(F_opts) opts.pop('-F', None) allow_custom_lexer_formatter = False # -x: allow custom (eXternal) lexers and formatters if opts.pop('-x', None) is not None: allow_custom_lexer_formatter = True # select lexer lexer = None # given by name? lexername = opts.pop('-l', None) if lexername: # custom lexer, located relative to user's cwd if allow_custom_lexer_formatter and '.py' in lexername: try: if ':' in lexername: filename, name = lexername.rsplit(':', 1) lexer = load_lexer_from_file(filename, name, **parsed_opts) else: lexer = load_lexer_from_file(lexername, **parsed_opts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 else: try: lexer = get_lexer_by_name(lexername, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 # read input code code = None if args: if len(args) > 1: print(usage, file=sys.stderr) return 2 if '-s' in opts: print('Error: -s option not usable when input file specified', file=sys.stderr) return 2 infn = args[0] try: with open(infn, 'rb') as infp: code = infp.read() except Exception as err: print('Error: cannot read infile:', err, file=sys.stderr) return 1 if not inencoding: code, inencoding = guess_decode(code) # do we have to guess the lexer? if not lexer: try: lexer = get_lexer_for_filename(infn, code, **parsed_opts) except ClassNotFound as err: if '-g' in opts: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) else: print('Error:', err, file=sys.stderr) return 1 except OptionError as err: print('Error:', err, file=sys.stderr) return 1 elif '-s' not in opts: # treat stdin as full file (-s support is later) # read code from terminal, always in binary mode since we want to # decode ourselves and be tolerant with it if sys.version_info > (3, ): # Python 3: we have to use .buffer to get a binary stream code = sys.stdin.buffer.read() else: code = sys.stdin.read() if not inencoding: code, inencoding = guess_decode_from_terminal(code, sys.stdin) # else the lexer will do the decoding if not lexer: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) else: # -s option needs a lexer with -l if not lexer: print('Error: when using -s a lexer has to be selected with -l', file=sys.stderr) return 2 # process filters for fname, fopts in F_opts: try: lexer.add_filter(fname, **fopts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 # select formatter outfn = opts.pop('-o', None) fmter = opts.pop('-f', None) if fmter: # custom formatter, located relative to user's cwd if allow_custom_lexer_formatter and '.py' in fmter: try: if ':' in fmter: file, fmtername = fmter.rsplit(':', 1) fmter = load_formatter_from_file(file, fmtername, **parsed_opts) else: fmter = load_formatter_from_file(fmter, **parsed_opts) except ClassNotFound as err: print('Error:', err, file=sys.stderr) return 1 else: try: fmter = get_formatter_by_name(fmter, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 if outfn: if not fmter: try: fmter = get_formatter_for_filename(outfn, **parsed_opts) except (OptionError, ClassNotFound) as err: print('Error:', err, file=sys.stderr) return 1 try: outfile = open(outfn, 'wb') except Exception as err: print('Error: cannot open outfile:', err, file=sys.stderr) return 1 else: if not fmter: if '256' in os.environ.get('TERM', ''): fmter = Terminal256Formatter(**parsed_opts) else: fmter = TerminalFormatter(**parsed_opts) if sys.version_info > (3, ): # Python 3: we have to use .buffer to get a binary stream outfile = sys.stdout.buffer else: outfile = sys.stdout # determine output encoding if not explicitly selected if not outencoding: if outfn: # output file? use lexer encoding for now (can still be None) fmter.encoding = inencoding else: # else use terminal encoding fmter.encoding = terminal_encoding(sys.stdout) # provide coloring under Windows, if possible if not outfn and sys.platform in ('win32', 'cygwin') and \ fmter.name in ('Terminal', 'Terminal256'): # pragma: no cover # unfortunately colorama doesn't support binary streams on Py3 if sys.version_info > (3, ): from pygments.util import UnclosingTextIOWrapper outfile = UnclosingTextIOWrapper(outfile, encoding=fmter.encoding) fmter.encoding = None try: import colorama.initialise except ImportError: pass else: outfile = colorama.initialise.wrap_stream(outfile, convert=None, strip=None, autoreset=False, wrap=True) # When using the LaTeX formatter and the option `escapeinside` is # specified, we need a special lexer which collects escaped text # before running the chosen language lexer. escapeinside = parsed_opts.get('escapeinside', '') if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter): left = escapeinside[0] right = escapeinside[1] lexer = LatexEmbeddedLexer(left, right, lexer) # ... and do it! if '-s' not in opts: # process whole input as per normal... highlight(code, lexer, fmter, outfile) return 0 else: # line by line processing of stdin (eg: for 'tail -f')... try: while 1: if sys.version_info > (3, ): # Python 3: we have to use .buffer to get a binary stream line = sys.stdin.buffer.readline() else: line = sys.stdin.readline() if not line: break if not inencoding: line = guess_decode_from_terminal(line, sys.stdin)[0] highlight(line, lexer, fmter, outfile) if hasattr(outfile, 'flush'): outfile.flush() return 0 except KeyboardInterrupt: # pragma: no cover return 0
print >> sys.stderr, usage return 2 if '-s' in opts: print >> sys.stderr, 'Error: -s option not usable when input file specified' return 2 infn = args[0] try: with open(infn, 'rb') as infp: code = infp.read() except Exception, err: print >> sys.stderr, 'Error: cannot read infile:', err return 1 if not inencoding: code, inencoding = guess_decode(code) # do we have to guess the lexer? if not lexer: try: lexer = get_lexer_for_filename(infn, code, **parsed_opts) except ClassNotFound, err: if '-g' in opts: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) else: print >> sys.stderr, 'Error:', err return 1 except OptionError, err: