class RTest(unittest.TestCase): def setUp(self): self.lexer = SLexer() def testCall(self): fragment = u'f(1, a)\n' tokens = [ (Name.Function, u'f'), (Punctuation, u'('), (Token.Literal.Number, u'1'), (Punctuation, u','), (Token.Text, u' '), (Token.Name, u'a'), (Punctuation, u')'), (Token.Text, u'\n'), ] self.assertEqual(tokens, list(self.lexer.get_tokens(fragment))) def testName1(self): fragment = u'._a_2.c' tokens = [ (Name, u'._a_2.c'), (Token.Text, u'\n'), ] self.assertEqual(tokens, list(self.lexer.get_tokens(fragment))) def testName2(self): # Invalid names are valid if backticks are used fragment = u'`.1 blah`' tokens = [ (Name, u'`.1 blah`'), (Token.Text, u'\n'), ] self.assertEqual(tokens, list(self.lexer.get_tokens(fragment))) def testName3(self): # Internal backticks can be escaped fragment = u'`.1 \\` blah`' tokens = [ (Name, u'`.1 \\` blah`'), (Token.Text, u'\n'), ] self.assertEqual(tokens, list(self.lexer.get_tokens(fragment))) def testCustomOperator(self): fragment = u'7 % and % 8' tokens = [ (Token.Literal.Number, u'7'), (Token.Text, u' '), (Token.Operator, u'% and %'), (Token.Text, u' '), (Token.Literal.Number, u'8'), (Token.Text, u'\n'), ] self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
def get_tokens_unprocessed(self, text): for index, token, value in SLexer.get_tokens_unprocessed(self, text): if token is Text and value in self.EXTRA_KEYWORDS: # Rescue misclassified tokens from imports yield index, Keyword.Pseudo, value else: yield index, token, value
def html_sourcecode(sourcecode): from pygments import highlight from pygments.lexers import SLexer from pygments.formatters import HtmlFormatter formatter = HtmlFormatter() htmlcode = highlight(sourcecode, SLexer(), formatter) d = {'sourcecode': htmlcode, 'syntax_highlighting': formatter.get_style_defs()} html = template_sourcecode.render(d) return html
def colorize(self, inputtexel, styles=None, bgcolor='white'): text = get_text(inputtexel) assert len(text) == length(inputtexel) formatter = TexelFormatter(bgcolor) highlight(text, SLexer(), formatter) model = formatter.model while len(model) < length(inputtexel): # XXX when does this happen? What does it mean? model.insert_text(len(model), '\n') model = formatter.model[0:length(inputtexel)] assert len(model) == length(inputtexel) return model.texel
def display_source_ipython(self): """Display source code as syntax highlighted HTML within IPython.""" import IPython.display as display from pygments import highlight from pygments.formatters import HtmlFormatter from pygments.lexers import SLexer with open(self.source_file) as f: code = f.read() formatter = HtmlFormatter() return display.HTML( '<style type="text/css">{}</style>{}'.format( formatter.get_style_defs('.highlight'), highlight(code, SLexer(), formatter), ) )
def display_source_ipython(self): """ Convenience method to print the loaded source file as syntax highlighted HTML within IPython. """ from pygments import highlight from pygments.lexers import SLexer from pygments.formatters import HtmlFormatter import IPython.display as display with open(self.source_file) as f: code = f.read() formatter = HtmlFormatter() return display.HTML('<style type="text/css">{}</style>{}'.format( formatter.get_style_defs('.highlight'), highlight(code, SLexer(), formatter)))
def lexer(): yield SLexer()
def highlighter( self, site, text ): # # ** THIS FUNCTION IS A TOTAL MESS AND I BOW MY HEAD IN SHAME ** # # # ESCAPE EXAMPLE PYTHON CODE ------------------------------------------- # if '\t```Python' in text: code_blox = re.findall ( r'\t```Python(.*?)\t```', text, re.DOTALL) orig_blox = [] esc_py_blox = [] for block in code_blox: orig_block = '\t```Python%s\t```' % block orig_blox.append( orig_block ) # run through pygments block = '<pre>\t```Python%s\t```</pre>' % block esc_py_blox.append( block ) for block in orig_blox: text=text.replace( block, '<div class="escpy">{{ esc-py-block }}</div>') # # PROCESS PYTHON CODE ---------------------------------------------------- # python_tag = '```Python' if python_tag in text: # process markdown text = pymarkdown.process( text ) # get everything wrapped in Python marker code_blox = re.findall ( '```Python(.*?)```', text, re.DOTALL) # initialize lists to store original & pygmentized code orig_blox = [] py_blox = [] # loop through all python code blocks for block in code_blox: # recreate original orig = '```Python%s```' % block orig_blox.append( orig ) # pygmentize and wrap pyg = highlight(block, PythonLexer(), HtmlFormatter()) py_blox.append( pyg ) #pyg = '<div class="py-block">%s</div>' % pyg #pyg_blox.append( pyg ) # replace original blox with pygmentized version for orig in orig_blox: text=text.replace( orig, '<div class="py-block">{{ py-block }}</div>' ) # # PROCESS R CODE ---------------------------------------------------- # r_tag = '```{r' if r_tag in text: # process R code, which relies on Knitr text = Parse().parseRBlocks( site, text ) text = Parse().removeKnitrHashes( text ) # get everything wrapped as input-output R code code_blox = re.findall ( r'(?<=[\n```r\n])(.*?)\n```\n\n```\n(.*?)\n```\n', text, re.DOTALL) r_input_blox = [] r_output_blox = [] r_io_str_blox = [] # ok, so above regex matches on second to last (i think?) ```r before # the sequence we want ... couldn't figure out how to tweak it, so # what we do is take the first element of the first list returned, then # split it on the *last* instance of the string, which always be the one # we want ... then we could do our crazy thing with the strings and the divs # god i hate myself right now for blk in code_blox: i=0 for it in blk: if i % 2 == 0: blerg = it.rsplit( '\n```r\n', 1 ) r_input = blerg[1] else: r_output = it i=i+1 r_input_blox.append( r_input ) r_output_blox.append( r_output ) r_orig_str = '\n```r\n%s\n```\n\n```\n%s\n```\n' % (r_input, r_output ) text = text.replace( r_orig_str, '\n\n{{ r-i/o-block }}\n\n') r_io_str = '<div class="rwrap"><div class="txtr"><p>In [i]:</p></div><div class="outputr">' pyg_r_input = highlight(r_input, SLexer(), HtmlFormatter()) r_io_str = '%s%s' % (r_io_str, pyg_r_input ) r_io_str = '%s</div><!-- end outputr></div><!-- end rwrap -->' % r_io_str r_io_str = '%s<div class="rwrap"><div class="txtr2"><p>Out [i]:</p></div><div class="outputr2">' % r_io_str pyg_r_output = highlight(r_output, SLexer(), HtmlFormatter()) r_io_str = '%s%s</div></div><!-- end rwrap -->\n' % (r_io_str, pyg_r_output) r_io_str_blox.append( r_io_str ) # get everything wrapped as input R code code_blox = re.findall ( r'```r(.*?)```', text, re.DOTALL) r_blox = [] for block in code_blox: # recreate original string, for use in .replace() match_str = '```r%s```' % block if match_str in text: print 'applause!' # p tags and trailing line breaks are important ... markdown won't always # parse subsequent text correctly w/o them text = text.replace( match_str, '\n\n<p>{{ r-block }}</p>\n\n' ) # pygmentize block & wrap r_input = highlight(block, SLexer(), HtmlFormatter()) r_input = '<div class="rwrap"><div class="txtr"><p>In [i]:</p></div><div class="outputr">%s' % r_input r_input = '%s</div></div><!-- end rwrap -->' % r_input r_blox.append( r_input ) # PROCESS NON-R and NON_PYTHON CODE ------------------------------------- if '```' in text: code_blox = re.findall ( '```(.*?)```', text, re.DOTALL) orig_blox = [] mod_blox = [] replacement_code_blocks = [] for block in code_blox: orig = '```%s```' % block orig_blox.append( orig ) block = '<pre>%s</pre>' % block replacement_code_blocks.append( block ) block = '<div class="codeblock">{{ code-block }}</div>' mod_blox.append( block ) # insert placeholder text while we process markdown for orig, mod in zip(orig_blox, mod_blox): text = text.replace( orig, mod ) # PROCESS MARKDOWN !! ---------------------------------------------------- text = markdown2.markdown( text ) # clean up images that markdown missed b/c R if '{{ r-block }}' in text or '{{ r-i/o-block }}' in text: r_pix_blox = re.findall( r'{{ r-block }}</p>\n\n<p><img src="(.*?)" alt="(.*?)" /> </p>', text, re.DOTALL ) for img_blox in r_pix_blox: img_title = img_blox[0] img_src = img_blox[1] match_str = '{{ r-block }}</p>\n\n<p><img src="%s" alt="%s" /> </p>' % (img_title, img_src) repl_str = '{{ r-block }}</p>\n\n<img src="%s" alt="%s" style="width:360px;" />' % (img_title, img_src) # <div class="rwrap"><div class="txtr"></div><div class="outputr"></div></div><!-- end rwrap --> text = text.replace( match_str, repl_str ) # REINSERT ANY CODE WE ESCAPED --------------------------------------- # ---- r code blocks ---- try: for block in r_io_str_blox: text = text.replace( '{{ r-i/o-block }}', block, 1) text = Parse().reformatRPost( text ) except: pass # ---- r code blocks ---- try: for block in r_blox: text = text.replace( '{{ r-block }}', block, 1) text = Parse().reformatRPost( text ) except: pass # ---- py code blocks ---- try: for block in py_blox: text = text.replace( '{{ py-block }}', block, 1) text = Parse().reformatRPost( text ) except: pass # ---- non-r code blocks ---- try: for block in replacement_code_blocks: text = text.replace( '{{ code-block }}', block, 1) except: pass # ---- example python code blocks ---- try: for block in esc_py_blox: text = text.replace( "{{ esc-py-block }}", block, 1 ) except: pass # if there was input/output code, number it if 'In [i]' in text: num = text.count( 'In [i]' )+1 i = 1 while i < num: s1 = text.find( 'In [i]' ) s2 = text.find( 'Out [i]' ) beg = s1+1 s3 = text.find( 'In [i]', beg ) InTxt = 'In [%d]' % i OutTxt = 'Out [%d]' % i text = text.replace( "In [i]", InTxt, 1) if s2 > -1 and s3 > -1: if s2 < s3: text = text.replace( "Out [i]", OutTxt, 1) if s2 > -1 and s3 == -1: text = text.replace( "Out [i]", OutTxt, 1) i=i+1 # finally, fix markdown bug ... i don't understand what's happening to # the leading <p> here ... markdown enclosed {{ r-block }} in p tags, but # sometimes only the trailing one remains after pygments does its thing? text = text.replace( '<div class="outputr">\n</p>', '<div class="outputr">\n' ) # RETURN AT LONG LAST! --------------------------------------------------- return text
def setUp(self): self.lexer = SLexer()