Exemplo n.º 1
0
    def format_unencoded(self, tokensource, outfile):
        # This line will force the CustomLexer above to produce a stream of tokens,
        # which will also store styles in the EmptyStyle class
        tokenlist = list(tokensource)

        # This extra style class is created to trigger the __new__ method in StyleMeta, to initialize the styles
        # that have been set in the CustomLexer
        @add_metaclass(StyleMeta)
        class ExtraStyle(EmptyStyle):
            pass

        outfile.write("{\n")

        # Make sure that the settings for writing custom style commands (see next block) has the right settings.
        # The command prefix is `PYGdefault` when no style has been set using `\usemintedstyle`.
        cp = self.commandprefix
        cp = (cp + "default") if cp == "PYG" else cp
        self.options["commandprefix"] = cp
        self.options["style"] = ExtraStyle

        # The style commands in LaTeX are written directly to the block of code, instead of to the pygstyle file
        # because the style can be different for every block of code (yay for ESV).
        outfile.write("\\makeatletter\n")
        for name, definition in LatexFormatter(**self.options).cmd2def.items():
            outfile.write(r'\expandafter\def\csname %s@tok@%s\endcsname{%s}' % (cp, name, definition))
            outfile.write("\n")
        outfile.write("\\makeatother\n")

        # After writing the style commands, just print out the Verbatim environment as usual
        LatexFormatter.format_unencoded(self, tokenlist, outfile)
        outfile.write("}\n")
Exemplo n.º 2
0
 def __init__(self, **options):
     LatexFormatter.__init__(self, **options)
     self.escapeinside = options.get('escapeinside', '')
     if len(self.escapeinside) == 2:
         self.left = self.escapeinside[0]
         self.right = self.escapeinside[1]
     else:
         self.escapeinside = ''
Exemplo n.º 3
0
 def __init__(self, **options):
     LatexFormatter.__init__(self, **options)
     self.escapeinside = options.get('escapeinside', '')
     if len(self.escapeinside) == 2:
         self.left = self.escapeinside[0]
         self.right = self.escapeinside[1]
     else:
         self.escapeinside = ''
Exemplo n.º 4
0
    def process(self):
        assert self.output_data.state == 'ready'

        if self.setting('pygments'):
            self.lexer = get_lexer_for_filename(self.input_data.storage.data_file())
            self.html_formatter = HtmlFormatter(lineanchors=self.output_data.web_safe_document_key())
            self.latex_formatter = LatexFormatter()

        if self.input_data.ext in ('.xml', '.txt'):
            parser = etree.XMLParser()
        elif self.input_data.ext == '.html':
            parser = etree.HTMLParser()
        else:
            raise Exception("Unsupported extension %s" % self.input_data.ext)

        tree = etree.parse(self.input_data.storage.data_file(), parser)

        for element in tree.iter("*"):
            element_keys = []
           
            for attribute_name in self.setting('unique-attributes'):
                if element.attrib.has_key(attribute_name):
                    element_keys.append(element.attrib[attribute_name])
            for attribute_name in self.setting('qualified-attributes'):
                if element.attrib.has_key(attribute_name):
                    element_keys.append(element.attrib[attribute_name])
                    element_keys.append("%s:%s" % (element.tag, element.attrib[attribute_name]))

            for element_key in element_keys:
                self.append_element_attributes_with_key(element, element_key)

        self.output_data.save()
Exemplo n.º 5
0
def print_style_defs(style):
    """
    Print \def's for a style
    """
    latex = LatexFormatter(style=style).get_style_defs()

    # Fix dollar sign and single quotation mark.
    # https://tex.stackexchange.com/a/255728
    # https://tex.stackexchange.com/a/238177
    replacements = [
        (r'\def\PYZdl{\char`\$}',
         r'\def\PYZdl{\char36}'),
        (r'\def\PYZsq{\char`\'}',
         r'\def\PYZsq{\textquotesingle}'),
    ]
    for wrong, right in replacements:
        latex = latex.replace(wrong, right)

    print(latex)
Exemplo n.º 6
0
class PythonTestFilter(Filter):
    """
    Runs the tests in the specified module(s) (which must be installed on the
    system) and returns a key-value store with test results, source code and
    html and latex highlighted source code.
    """
    ALIASES = ['pytest']
    INPUT_EXTENSIONS = [".txt"]
    OUTPUT_EXTENSIONS = ['.json']
    LEXER = PythonLexer()
    LATEX_FORMATTER = LatexFormatter()
    HTML_FORMATTER = HtmlFormatter(lineanchors="pytest")

    # TODO some way to ensure tests logs get written elsewhere, like to the artifact output, they are going to main log for now - very confusing

    def process(self):
        self.artifact.setup_kv_storage()

        loader = nose.loader.TestLoader()
        for module_name in self.artifact.input_text().split():
            self.log.debug("Starting to process module '%s'" % module_name)
            tests = loader.loadTestsFromName(module_name)
            self.log.debug("Loaded tests.")
            for test in tests:
                self.log.debug("Running test suite %s" % test)
                test_passed = nose.core.run(suite=test, argv=['nosetests'])
                self.log.debug("Passed: %s" % test_passed)
                for x in dir(test.context):
                    xx = test.context.__dict__[x]
                    if inspect.ismethod(xx) or inspect.isfunction(xx):
                        test_context_name = test.context.__name__
                        qualified_test_name = "%s.%s" % (test_context_name, xx.__name__)

                        source = inspect.getsource(xx.__code__)
                        html_source = highlight(source, self.LEXER, self.HTML_FORMATTER)
                        latex_source = highlight(source, self.LEXER, self.LATEX_FORMATTER)

                        if test_passed:
                            html_result = """ <div class="test-passed"> %s PASSED </div> """ % qualified_test_name
                        else:
                            html_result = """ <div class="test-failed"> %s FAILED </div> """ % qualified_test_name

                        self.artifact.output_data.append("%s:source" % qualified_test_name, source)
                        self.artifact.output_data.append("%s:html-source" % qualified_test_name, html_source)
                        self.artifact.output_data.append("%s:latex-source" % qualified_test_name, latex_source)
                        self.artifact.output_data.append("%s:test-passed" % qualified_test_name, test_passed)
                        self.artifact.output_data.append("%s:html-result" % qualified_test_name, html_result)
                        self.artifact.output_data.append("%s:html-source+result" % qualified_test_name, "%s\n%s" % (html_source, html_result))

        self.artifact._storage.save()
Exemplo n.º 7
0
 def __init__(self, query, highlightFormat: str = "html"):
     '''
     construct me for the given query and highlightFormat
     
     Args:
         query(Query): the query to do the syntax highlighting for
         highlightFormat(str): the highlight format to be used
     '''
     self.query = query
     self.highlightFormat = highlightFormat
     self.lexer = get_lexer_by_name(self.query.lang)
     if self.highlightFormat == "html":
         self.formatter = HtmlFormatter()
     elif self.highlightFormat == "latex":
         self.formatter = LatexFormatter()
Exemplo n.º 8
0
	def highlight_code(self):
		html_snippets = []
		if self.matched_line_number():
			snippet_cluster_lns = self.compute_lines_to_highlight(self.adjacent_line_numbers())
			snippets = []
			for snippet_cluster_ln in snippet_cluster_lns:
				snippet = []
	
				for n in snippet_cluster_ln:	
					snippet.append(self.file_content_lines[n])
				start_line = min(snippet_cluster_ln)
				
				highlight_lines = map(lambda x: x - start_line + 1, self.matching_line_numbers)
				snippets.append(("\n".join(snippet), start_line, highlight_lines))

			html_snippets = [highlight(snippet[0], JavaLexer(), LatexFormatter(linenos=True, linenostart=snippet[1])) for snippet in snippets]
			self.code_snippets = [ GitSearchItemSnippet( self.hl_snippet( snippet[0], snippet[1]), snippet[1]) for snippet in snippets] 

		
		if not html_snippets:
			html_snippets.append(highlight(self.file_content, JavaLexer(), HtmlFormatter(linenos=True, anchorlinenos=True)))
			self.code_snippets.append( GitSearchItemSnippet( self.hl_snippet( self.file_content, 0), 0) )
		return "".join(html_snippets)
Exemplo n.º 9
0
class PythonDocumentationFilter(Filter):
    ALIASES = ["pydoc"]
    INPUT_EXTENSIONS = [".txt"]
    OUTPUT_EXTENSIONS = ['.json']
    COMPOSER = Composer()
    OUTPUT_DATA_TYPE = 'keyvalue'
    LEXER = PythonLexer()
    LATEX_FORMATTER = LatexFormatter()
    HTML_FORMATTER = HtmlFormatter(lineanchors="pydoc")

    def fetch_item_content(self, key, item):
        is_method = inspect.ismethod(item)
        is_function = inspect.isfunction(item)
        if is_method or is_function:
            # Get source code
            try:
                source = inspect.getsource(item)
            except IOError as e:
                source = ""
            # Process any idiopidae tags
            builder = idiopidae.parser.parse('Document', source + "\n\0")

            sections = {}
            for i, s in enumerate(builder.sections):
                lines = builder.statements[i]['lines']
                sections[s] = "\n".join(l[1] for l in builder.statements[i]['lines'])

            if isinstance(sections, dict):
                if len(sections.keys()) > 1 or sections.keys()[0] != '1':
                    for section_name, section_content in sections.iteritems():
                        self.add_source_for_key("%s:%s" % (key, section_name), section_content)
                else:
                    self.add_source_for_key(key, sections['1'])
            else:
                self.add_source_for_key(key, sections)

            self.artifact.output_data.append("%s:doc" % key, inspect.getdoc(item))
            self.artifact.output_data.append("%s:comments" % key, inspect.getcomments(item))

        else: # not a function or a method
            try:
                # If this can be JSON-serialized, leave it alone...
                json.dumps(item)
                self.add_source_for_key(key, item)
            except TypeError:
                # ... if it can't, convert it to a string to avoid problems.
                self.add_source_for_key(key, str(item))

    def highlight_html(self, source):
        return highlight(source, self.LEXER, self.HTML_FORMATTER)

    def highlight_latex(self, source):
        return highlight(source, self.LEXER, self.LATEX_FORMATTER)

    def add_source_for_key(self, key, source):
        """
        Appends source code + syntax highlighted source code to persistent store.
        """
        self.artifact.output_data.append("%s:value" % key, source)
        if not (type(source) == str or type(source) == unicode):
            source = inspect.getsource(source)
        self.artifact.output_data.append("%s:source" % key, source)
        self.artifact.output_data.append("%s:html-source" % key, self.highlight_html(source))
        self.artifact.output_data.append("%s:latex-source" % key, self.highlight_latex(source))

    def process_members(self, package_name, mod):
        """
        Process all members of the package or module passed.
        """
        name = mod.__name__

        for k, m in inspect.getmembers(mod):
            self.log.debug("in %s processing element %s" % (mod.__name__, k))
            if not inspect.isclass(m) and hasattr(m, '__module__') and m.__module__ and m.__module__.startswith(package_name):
                key = "%s.%s" % (m.__module__, k)
                self.fetch_item_content(key, m)

            elif inspect.isclass(m) and m.__module__.startswith(package_name):
                key = "%s.%s" % (mod.__name__, k)
                try:
                    item_content = inspect.getsource(m)
                    self.artifact.output_data.append("%s:doc" % key, inspect.getdoc(m))
                    self.artifact.output_data.append("%s:comments" % key, inspect.getcomments(m))
                    self.add_source_for_key(key, item_content)
                except IOError:
                    self.log.debug("can't get source for %s" % key)
                    self.add_source_for_key(key, "")

                try:
                    for ck, cm in inspect.getmembers(m):
                        key = "%s.%s.%s" % (name, k, ck)
                        self.fetch_item_content(key, cm)
                except AttributeError:
                    pass

            else:
                key = "%s.%s" % (name, k)
                self.fetch_item_content(key, m)

    def process_module(self, package_name, name):
        try:
            self.log.debug("Trying to import %s" % name)
            __import__(name)
            mod = sys.modules[name]
            self.log.debug("Success importing %s" % name)

            try:
                module_source = inspect.getsource(mod)
                json.dumps(module_source)
                self.add_source_for_key(name, inspect.getsource(mod))
            except (UnicodeDecodeError, IOError, TypeError):
                self.log.debug("Unable to load module source for %s" % name)

            self.process_members(package_name, mod)

        except (ImportError, TypeError) as e:
            self.log.debug(e)

    def process(self):
        """
        input_text should be a list of installed python libraries to document.
        """
        package_names = self.artifact.input_data.as_text().split()
        packages = [__import__(package_name) for package_name in package_names]

        for package in packages:
            self.log.debug("processing package %s" % package)
            package_name = package.__name__
            prefix = package.__name__ + "."

            self.process_members(package_name, package)

            if hasattr(package, '__path__'):
                for module_loader, name, ispkg in pkgutil.walk_packages(package.__path__, prefix=prefix):
                    self.log.debug("in package %s processing module %s" % (package_name, name))
                    if not name.endswith("__main__"):
                        self.process_module(package_name, name)
            else:
                self.process_module(package.__name__, package.__name__)

        self.artifact.output_data.save()
Exemplo n.º 10
0
from idiopidae.runtime import Composer
from pygments import highlight
from pygments.formatters.latex import LatexFormatter
from pygments.lexers.agile import PythonLexer
import dexy
import idiopidae.parser
import inspect
import json
import pkgutil
import sys

lexer = PythonLexer()
formatter = LatexFormatter()
composer = Composer()

def fetch_item_content(cm):
    is_method = inspect.ismethod(cm)
    is_function = inspect.isfunction(cm)

    if is_method or is_function:
        try:
            source = inspect.getsource(cm)
        except IOError:
            source = ""

        builder = idiopidae.parser.parse('Document', source + "\n\0")
        sections = {}

        for i, s in enumerate(builder.sections):
            lines = builder.statements[i]['lines']
            sections[s] = composer.format(lines, lexer, formatter)
Exemplo n.º 11
0
    def highlight_code(self, search_count, rank, targetpath):
        html_snippets = []
        matched_line_numbers = self.matched_line_number()

        if matched_line_numbers:
            snippet_cluster_lns = self.compute_lines_to_highlight(
                self.adjacent_line_numbers())
            snippets = []

            ##################### Data provision ######################
            pure_snippets_for_data_requirement = []  ##
            #snippet_cluster_lns		##lines_to_be_highlighted
            #self.file_content_lines	##entire code

            for snippet_cluster_ln in snippet_cluster_lns:
                snippet = []
                for n in snippet_cluster_ln:
                    snippet.append(self.file_content_lines[n])
                    pure_snippets_for_data_requirement.append(
                        self.file_content_lines[n])

                start_line = min(snippet_cluster_ln)
                # end_line = max(snippet_cluster_ln)
                highlight_lines = map(lambda x: x - start_line + 1,
                                      self.matching_line_numbers)
                snippets.append(
                    ("\n".join(snippet), start_line, highlight_lines))

            # #Data provision_ Defect4J (모든) 각 쿼리별 랭킹순으로 하이라이트 라인번호 + 전체코드 파일
            final_str = "\n".join(self.file_content_lines)
            original_project_list = [
                'knutwalker_google-closure-compiler',
                'google_closure-compiler', 'weitzj_closure-compiler',
                'jfree_jfreechart-fse', 'jfree_jfreechart',
                'apache_commons-math', 'apache_commons-lang',
                'mockito_mockito', 'bryceguo_mockito'
            ]

            if not self.file_path.split(
                    '/'
            )[6] in original_project_list:  #Check the duplicate projects.
                purepath = targetpath[:-4]
                if not os.path.exists(purepath):
                    os.makedirs(purepath)

                testcode_path = purepath + '_result_testcode'
                if not os.path.exists(testcode_path):
                    os.makedirs(testcode_path)

                if self.file_path.split(
                        '/')[-1] == 'Test.java':  # 결과 파일이 Test.java 인 것들 걸러내기
                    pass
                else:
                    final_path = purepath + "/" + str(rank) + '_' + str(
                        "||".join(self.file_path.split('/')[6:]))
                    write_file(final_path, str(final_str))
                    write_file(final_path + "_", str(snippet_cluster_lns))
                    print "*****************************", final_path, "is Done.."

                    # 여기서 test code 또한 찾아서 셋트로 돌려줘보자.
                    # >> 각각 결과코드에 해당하는 프로젝트 경로를 새로 다 뒤져서 현재 결과파일 앞뒤로 test 키워드 들어있는 파일들을 찾아본다.
                    # >> 테스트 파일 찾았으면 복붙

                    result_file_name = self.file_path.split('/')[-1]
                    result_pure_file_name = (
                        (self.file_path.split('/')[-1]).split('.')
                    )[0]  #pure name of the java file (e.g., ABC.java -> ABC)

                    stopwords = ['A', 'a', 'test']
                    javafiles = java_files_from_dir('/'.join(
                        self.file_path.split('/')[:7]))
                    for javafile in javafiles:
                        if result_pure_file_name in stopwords:
                            continue
                        name_of_javafile = javafile.split('/')[-1].split(
                            '.')[0]
                        if 'test' in name_of_javafile and result_pure_file_name in name_of_javafile:  #해당 결과파일의 이름과 'test' 라는 키워드가 들어가있는 파일이면 (e.g., xxxtest.java or testxxx.java)
                            content = read_file(javafile)
                            testcode_path = testcode_path + '/' + str(
                                rank) + '_' + str('||'.join(
                                    javafile.split('/')[6:]))
                            write_file(testcode_path, content)
                            write_file('/Users/Falcon/Desktop/count.txt',
                                       testcode_path)
                            write_file('/Users/Falcon/Desktop/count.txt',
                                       javafile)
                            write_file('/Users/Falcon/Desktop/count.txt',
                                       '**********************')

            html_snippets = [
                highlight(snippet[0], JavaLexer(),
                          LatexFormatter(linenos=True, linenostart=snippet[1]))
                for snippet in snippets
            ]
            self.code_snippets = [
                GitSearchItemSnippet(self.hl_snippet(snippet[0], snippet[1]),
                                     snippet[1]) for snippet in snippets
            ]

        if not html_snippets:
            html_snippets.append(
                highlight(self.file_content, JavaLexer(),
                          HtmlFormatter(linenos=True, anchorlinenos=True)))
            self.code_snippets.append(
                GitSearchItemSnippet(self.hl_snippet(self.file_content, 0), 0))
        return "".join(html_snippets)
Exemplo n.º 12
0

class TalonLexer(RegexLexer):
    name = "Talon Keyword Lexer"
    aliases = ['talon']
    tokens = {
        'root': [
            (r'\bphrase\b', Name.Function),
            (r'\d+', Number),
            (regexFromWordList(talon.f_keys), Number),
            (regexFromWordList(talon.arrows + talon.modifiers), Name),
            (regexFromWordList(talon.alpha_alt + talon.simple_keys + talon.alternate_keys), Keyword),
            (regexFromWordList(talon.symbols), Operator),
            (r'\s+', Whitespace),
            (r'[\'`",\.\:\+\-\=\*\\/]', Operator),
            (r'[a-zA-Z]+', Generic),
        ],
    }


if __name__ == "__main__":
    print("started")
    lexer = DomsagxoLexer()
    print("created lexer")
    code = """ŝambaluli en la nokton signifas dudek horoj estas egala al la kato en domsaĝo finu"""
    formatter = LatexFormatter(style=DomsagxoStyle)
    formatter.full = True
    with open('domsa.tex', 'w', encoding='utf8') as outFile:
        highlight(code, lexer, formatter,
            outFile)
Exemplo n.º 13
0
# proceed arguments
if (not len(sys.argv) >= 2):
    sys.exit("1 or 2 arguments are required")

aFile = sys.argv[1]

# by default, the formatter is HTML
formatter = HtmlFormatter(linenos=False, cssclass="code", style="trac")
aFormat = 'html'

if (len(sys.argv) == 3):
    aFormat = sys.argv[2]
    if (aFormat == 'tex'):
        print "Latex!"
        formatter = LatexFormatter(linenos=True,
                                   cssclass="highlight",
                                   style="borland",
                                   full=True)
    if (aFormat == 'png'):
        formatter = ImageFormatter(line_numbers=True,
                                   cssclass="highlight",
                                   style="borland")
    if (aFormat == 'svg'):
        formatter = SvgFormatter(line_numbers=False,
                                 cssclass="highlight",
                                 style="borland")
else:
    print "No format specified. By default: " + aFormat

filename = PATH_FOLDER + aFile + ".fml"  #".fmm"
outputf = open(PATH_OUPUT + os.path.basename(aFile) + "." + aFormat, "w")
filin = open(filename, 'r')