def guess_lexer(_text, **options): """Guess a lexer by strong distinctions in the text (eg, shebang).""" if not isinstance(_text, text_type): inencoding = options.get('inencoding', options.get('encoding')) if inencoding: _text = _text.decode(inencoding or 'utf8') else: _text, _ = guess_decode(_text) # try to get a vim modeline first ft = get_filetype_from_buffer(_text) if ft is not None: try: return get_lexer_by_name(ft, **options) except ClassNotFound: pass best_lexer = [0.0, None] for lexer in _iter_lexerclasses(): rv = lexer.analyse_text(_text) if rv == 1.0: return lexer(**options) if rv > best_lexer[0]: best_lexer[:] = (rv, lexer) if not best_lexer[0] or best_lexer[1] is None: raise ClassNotFound('no lexer matching the text found') return best_lexer[1](**options)
def guess_lexer(_text, conf_threshold=0.01, mime_mul=0.09, **options): """Guess a lexer by strong distinctions in the text (eg, shebang).""" # try to get a vim modeline first ft = get_filetype_from_buffer(_text) if ft is not None: try: return get_lexer_by_name(ft, **options) except ClassNotFound: pass best_lexer = [0.0, None] for lexer in _iter_lexerclasses(): rv = lexer.analyse_text(_text) #MIME has an unusually high rv when providing it with any kind of text if 'MIME' in str(lexer): rv *= mime_mul if rv == 1.0: return lexer(**options) if rv > best_lexer[0]: best_lexer[:] = (rv, lexer) if not best_lexer[ 0] or best_lexer[0] < conf_threshold or best_lexer[1] is None: raise ClassNotFound('no lexer matching the text found') return best_lexer[1](**options)
def guess_lexer_using_modeline(text): """Guess lexer for given text using Vim modeline. Returns a tuple of (lexer, accuracy). """ lexer, accuracy = None, None file_type = None try: file_type = get_filetype_from_buffer(text) except: pass if file_type is not None: try: lexer = get_lexer_by_name(file_type) except ClassNotFound: pass if lexer is not None: try: accuracy = lexer.analyse_text(text) except: pass return lexer, accuracy
def test_modelines(): for buf in [ 'vi: ft=python' + '\n' * 8, 'vi: ft=python' + '\n' * 8, '\n\n\n\nvi=8: syntax=python' + '\n' * 8, '\n' * 8 + 'ex: filetype=python', '\n' * 8 + 'vim: some,other,syn=python\n\n\n\n' ]: assert modeline.get_filetype_from_buffer(buf) == 'python'
def guess_lexer(_text, **options): """Guess a lexer by strong distinctions in the text (eg, shebang).""" # try to get a vim modeline first ft = get_filetype_from_buffer(_text) if ft is not None: try: return get_lexer_by_name(ft, **options) except ClassNotFound: pass best_lexer = [0.0, None] for lexer in _iter_lexerclasses(): rv = lexer.analyse_text(_text) if rv == 1.0: return lexer(**options) if rv > best_lexer[0]: best_lexer[:] = (rv, lexer) if not best_lexer[0] or best_lexer[1] is None: raise ClassNotFound('no lexer matching the text found') return best_lexer[1](**options)
def verify(buf): assert modeline.get_filetype_from_buffer(buf) == 'python'