Exemplo n.º 1
0
class RTest(unittest.TestCase):
    def setUp(self):
        self.lexer = SLexer()

    def testCall(self):
        fragment = u'f(1, a)\n'
        tokens = [
            (Name.Function, u'f'),
            (Punctuation, u'('),
            (Token.Literal.Number, u'1'),
            (Punctuation, u','),
            (Token.Text, u' '),
            (Token.Name, u'a'),
            (Punctuation, u')'),
            (Token.Text, u'\n'),
        ]
        self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))

    def testName1(self):
        fragment = u'._a_2.c'
        tokens = [
            (Name, u'._a_2.c'),
            (Token.Text, u'\n'),
        ]
        self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))

    def testName2(self):
        # Invalid names are valid if backticks are used
        fragment = u'`.1 blah`'
        tokens = [
            (Name, u'`.1 blah`'),
            (Token.Text, u'\n'),
        ]
        self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))

    def testName3(self):
        # Internal backticks can be escaped
        fragment = u'`.1 \\` blah`'
        tokens = [
            (Name, u'`.1 \\` blah`'),
            (Token.Text, u'\n'),
        ]
        self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))

    def testCustomOperator(self):
        fragment = u'7 % and % 8'
        tokens = [
            (Token.Literal.Number, u'7'),
            (Token.Text, u' '),
            (Token.Operator, u'% and %'),
            (Token.Text, u' '),
            (Token.Literal.Number, u'8'),
            (Token.Text, u'\n'),
        ]
        self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
Exemplo n.º 2
0
 def get_tokens_unprocessed(self, text):
     for index, token, value in SLexer.get_tokens_unprocessed(self, text):
         if token is Text and value in self.EXTRA_KEYWORDS:
             # Rescue misclassified tokens from imports
             yield index, Keyword.Pseudo, value
         else:
             yield index, token, value
Exemplo n.º 3
0
def html_sourcecode(sourcecode):
    from pygments import highlight
    from pygments.lexers import SLexer
    from pygments.formatters import HtmlFormatter
    formatter = HtmlFormatter()
    htmlcode = highlight(sourcecode, SLexer(), formatter)
    d = {'sourcecode': htmlcode,
         'syntax_highlighting': formatter.get_style_defs()}
    html = template_sourcecode.render(d)
    return html
Exemplo n.º 4
0
 def colorize(self, inputtexel, styles=None, bgcolor='white'):
     text = get_text(inputtexel)
     assert len(text) == length(inputtexel)
     formatter = TexelFormatter(bgcolor)
     highlight(text, SLexer(), formatter)
     model = formatter.model
     while len(model) < length(inputtexel):
         # XXX when does this happen? What does it mean?
         model.insert_text(len(model), '\n')
     model = formatter.model[0:length(inputtexel)]
     assert len(model) == length(inputtexel)
     return model.texel
Exemplo n.º 5
0
    def display_source_ipython(self):
        """Display source code as syntax highlighted HTML within IPython."""
        import IPython.display as display
        from pygments import highlight
        from pygments.formatters import HtmlFormatter
        from pygments.lexers import SLexer

        with open(self.source_file) as f:
            code = f.read()

        formatter = HtmlFormatter()
        return display.HTML(
            '<style type="text/css">{}</style>{}'.format(
                formatter.get_style_defs('.highlight'),
                highlight(code, SLexer(), formatter),
            )
        )
Exemplo n.º 6
0
    def display_source_ipython(self):
        """
        Convenience method to print the loaded source file
        as syntax highlighted HTML within IPython.
        """
        from pygments import highlight
        from pygments.lexers import SLexer

        from pygments.formatters import HtmlFormatter
        import IPython.display as display

        with open(self.source_file) as f:
            code = f.read()

        formatter = HtmlFormatter()
        return display.HTML('<style type="text/css">{}</style>{}'.format(
            formatter.get_style_defs('.highlight'),
            highlight(code, SLexer(), formatter)))
Exemplo n.º 7
0
def lexer():
    yield SLexer()
Exemplo n.º 8
0
	def highlighter( self, site, text ):

		#
		#	** THIS FUNCTION IS A TOTAL MESS AND I BOW MY HEAD IN SHAME **
		#

		#
		#	ESCAPE EXAMPLE PYTHON CODE -------------------------------------------
		#
		if '\t```Python' in text:
			code_blox = re.findall ( r'\t```Python(.*?)\t```', text, re.DOTALL)
			orig_blox = []
			esc_py_blox = []

			for block in code_blox:
				orig_block = '\t```Python%s\t```' % block
				orig_blox.append( orig_block )

				# run through pygments
				block = '<pre>\t```Python%s\t```</pre>' % block
				esc_py_blox.append( block )

			for block in orig_blox: 
				text=text.replace( block, '<div class="escpy">{{ esc-py-block }}</div>')


		#
		#	PROCESS PYTHON CODE ----------------------------------------------------
		# 
		python_tag = '```Python'
		if python_tag in text:

			# process markdown
			text = pymarkdown.process( text )

			# get everything wrapped in Python marker
			code_blox = re.findall ( '```Python(.*?)```', text, re.DOTALL)
			
			# initialize lists to store original & pygmentized code  
			orig_blox = []
			py_blox = []

			# loop through all python code blocks
			for block in code_blox:

				# recreate original
				orig = '```Python%s```' % block
				orig_blox.append( orig )

				# pygmentize and wrap
				pyg = highlight(block, PythonLexer(), HtmlFormatter())
				py_blox.append( pyg )
				#pyg = '<div class="py-block">%s</div>' % pyg
				#pyg_blox.append( pyg )

			# replace original blox with pygmentized version
			for orig in orig_blox: 

				text=text.replace( orig, '<div class="py-block">{{ py-block }}</div>' )


		#
		#	PROCESS R CODE ----------------------------------------------------
		# 
		r_tag = '```{r'

		if r_tag in text:

			# process R code, which relies on Knitr
			text = Parse().parseRBlocks( site, text )
			text = Parse().removeKnitrHashes( text )

			# get everything wrapped as input-output R code
			code_blox = re.findall ( r'(?<=[\n```r\n])(.*?)\n```\n\n```\n(.*?)\n```\n', text, re.DOTALL)

			r_input_blox = []
			r_output_blox = []
			r_io_str_blox = []

			# ok, so above regex matches on second to last (i think?) ```r before 
			# the sequence we want ... couldn't figure out how to tweak it, so 
			# what we do is take the first element of the first list returned, then
			# split it on the *last* instance of the string, which always be the one
			# we want ... then we could do our crazy thing with the strings and the divs
			# god i hate myself right now
			for blk in code_blox:

				i=0
				for it in blk:
					if i % 2 == 0:
						blerg = it.rsplit( '\n```r\n', 1 )
						r_input = blerg[1] 
					else: 
						r_output = it
					i=i+1

				r_input_blox.append( r_input )
				r_output_blox.append( r_output )
				r_orig_str = '\n```r\n%s\n```\n\n```\n%s\n```\n' % (r_input, r_output )

				text = text.replace( r_orig_str, '\n\n{{ r-i/o-block }}\n\n')
				r_io_str = '<div class="rwrap"><div class="txtr"><p>In [i]:</p></div><div class="outputr">'
				pyg_r_input = highlight(r_input, SLexer(), HtmlFormatter())

				r_io_str = '%s%s' % (r_io_str, pyg_r_input )
				r_io_str = '%s</div><!-- end outputr></div><!-- end rwrap -->' % r_io_str
				r_io_str = '%s<div class="rwrap"><div class="txtr2"><p>Out [i]:</p></div><div class="outputr2">' % r_io_str
				pyg_r_output = highlight(r_output, SLexer(), HtmlFormatter())
				r_io_str = '%s%s</div></div><!-- end rwrap -->\n' % (r_io_str, pyg_r_output)
				r_io_str_blox.append( r_io_str )


			# get everything wrapped as input R code
			code_blox = re.findall ( r'```r(.*?)```', text, re.DOTALL)

			r_blox = []
			for block in code_blox:

				# recreate original string, for use in .replace()		
				match_str = '```r%s```' % block

				if match_str in text: 
					print 'applause!'

				# p tags and trailing line breaks are important ... markdown won't always 
				# parse subsequent text correctly w/o them
				text = text.replace( match_str, '\n\n<p>{{ r-block }}</p>\n\n' )

				# pygmentize block & wrap 
				r_input = highlight(block, SLexer(), HtmlFormatter())
				r_input = '<div class="rwrap"><div class="txtr"><p>In [i]:</p></div><div class="outputr">%s' % r_input 
				r_input = '%s</div></div><!-- end rwrap -->' % r_input
				r_blox.append( r_input )


		# PROCESS NON-R and NON_PYTHON CODE -------------------------------------
		
		if '```' in text:
			code_blox = re.findall ( '```(.*?)```', text, re.DOTALL)
			orig_blox = []
			mod_blox = []
			replacement_code_blocks = []

			for block in code_blox:		
				orig = '```%s```' % block
				orig_blox.append( orig )

				block = '<pre>%s</pre>' % block
				replacement_code_blocks.append( block )

				block = '<div class="codeblock">{{ code-block }}</div>'
				mod_blox.append( block )

			# insert placeholder text while we process markdown 
			for orig, mod in zip(orig_blox, mod_blox):
				text = text.replace( orig, mod )




		# PROCESS MARKDOWN !! ----------------------------------------------------

		text = markdown2.markdown( text )


		# clean up images that markdown missed b/c R

		if '{{ r-block }}' in text or '{{ r-i/o-block }}' in text:

			r_pix_blox = re.findall( r'{{ r-block }}</p>\n\n<p><img src="(.*?)" alt="(.*?)" /> </p>', text, re.DOTALL )

			for img_blox in r_pix_blox: 
				
				img_title = img_blox[0]
				img_src = img_blox[1]
				match_str = '{{ r-block }}</p>\n\n<p><img src="%s" alt="%s" /> </p>' % (img_title, img_src)
				repl_str = '{{ r-block }}</p>\n\n<img src="%s" alt="%s" style="width:360px;" />' % (img_title, img_src)

				# <div class="rwrap"><div class="txtr"></div><div class="outputr"></div></div><!-- end rwrap -->
				text = text.replace( match_str, repl_str )



		# REINSERT ANY CODE WE ESCAPED ---------------------------------------

		# ---- r code blocks ----
		try: 
			for block in r_io_str_blox: 
				text = text.replace( '{{ r-i/o-block }}', block, 1)
			text = Parse().reformatRPost( text )

		except:
			pass

		# ---- r code blocks ----
		try: 
			for block in r_blox: 
				text = text.replace( '{{ r-block }}', block, 1)
			text = Parse().reformatRPost( text )

		except:
			pass


		# ---- py code blocks ----
		try: 
			for block in py_blox: 
				text = text.replace( '{{ py-block }}', block, 1)
			text = Parse().reformatRPost( text )

		except:
			pass

		# ---- non-r code blocks ----
		try:
			for block in replacement_code_blocks: 
				text = text.replace( '{{ code-block }}', block, 1)
		except:
			pass

		# ---- example python code blocks ----
		try:
			for block in esc_py_blox: 
				text = text.replace( "{{ esc-py-block }}", block, 1 )
		except: 
			pass


		# if there was input/output code, number it
		if 'In [i]' in text: 
			num = text.count( 'In [i]' )+1
			i = 1
			while i < num: 

				s1 = text.find( 'In [i]' )
				s2 = text.find( 'Out [i]' )
				beg = s1+1
				s3 = text.find( 'In [i]', beg )

				InTxt = 'In [%d]' % i
				OutTxt = 'Out [%d]' % i

				text = text.replace( "In [i]", InTxt, 1)

				if s2 > -1 and s3 > -1: 
					if s2 < s3:
						text = text.replace( "Out [i]", OutTxt, 1)
				if s2 > -1 and s3 == -1: 
					text = text.replace( "Out [i]", OutTxt, 1)
				i=i+1


		# finally, fix markdown bug ... i don't understand what's happening to 
		# the leading <p> here ... markdown enclosed {{ r-block }} in p tags, but 
		# sometimes only the trailing one remains after pygments does its thing?
		text = text.replace( '<div class="outputr">\n</p>', '<div class="outputr">\n' )

		# RETURN AT LONG LAST! ---------------------------------------------------	
		return text
Exemplo n.º 9
0
 def setUp(self):
     self.lexer = SLexer()