def add_project_to_corpus(project): """ Assumes that the project_dir contains a text file named build_command.txt that contains the build command(s) for the project in this directory, and a clean_command.txt that will clean the project. """ common.clean_project(project) """Run dljc Run Randoop to generate test sources Compile test sources Run daikon.Chicory on tests to create dtrace file Precompute graph kernels that are independent of ontology stuff """ common.run_dljc(project, ['dyntrace', 'graphtool'], ['--graph-jar', common.get_jar('prog2dfg.jar'), '--dyntrace-libs', common.LIBS_DIR]) """ run petablox """ #run_petablox(project_dir) """ run graph kernel computation """ project_dir = common.get_project_dir(project) kernel_file_path = common.get_kernel_path(project) graph_kernel_cmd = ['python', common.get_simprog('precompute_kernel.py'), project_dir, kernel_file_path ] common.run_cmd(graph_kernel_cmd) print 'Generated kernel file for {0}.'.format(project) return kernel_file_path
def compute_clusters_for_classes(project_list, out_file_name, cf_map_file_name="./class_field_map.json", wf_map_file_name="./word_based_field_clusters.json"): class_dirs = list() for project in project_list: print common.get_class_dirs(project) class_dirs.extend(common.get_class_dirs(project)) if len(class_dirs)<1: print("No class dirs found to cluster. Make sure you run dljc first.") return clusterer_cmd = ['java', '-jar', common.get_jar('clusterer.jar'), '-cs', '3', '-out', out_file_name, '-cfm', cf_map_file_name, '-wfm', wf_map_file_name, '-dirs' ] clusterer_cmd.extend(class_dirs) common.run_cmd(clusterer_cmd, True) # Check if the file exists and is not empty. if os.path.exists(wf_map_file_name) and os.path.getsize(wf_map_file_name) > 0: print ("Generate jaif file") map2annotation.field_mappings_to_annotation(project_list, wf_map_file_name) for project in project_list: map2annotation.run_anno_inference(project) else: print("Warning: Missing or empty {0} file.".format(wf_map_file_name)) print("Warning: map2annotation won't be executed.")
def get_daikon_patterns(): ordering_operator = "<=" ontology_invariant_file = "TODO_from_Howie.txt" with open(ontology_invariant_file, 'w') as f: f.write(ordering_operator) invariant_name = "TODO_sorted_sequence" daikon_pattern_java_file = ontology_to_daikon.create_daikon_invariant( ontology_invariant_file, invariant_name) pattern_class_dir = os.path.join(common.WORKING_DIR, "invClass") if os.path.isdir(pattern_class_dir): shutil.rmtree(pattern_class_dir) os.mkdir(pattern_class_dir) cmd = [ "javac", "-g", "-classpath", common.get_jar('daikon.jar'), daikon_pattern_java_file, "-d", pattern_class_dir ] common.run_cmd(cmd) return pattern_class_dir
def add_project_to_corpus(project): """ Assumes that the project_dir contains a text file named build_command.txt that contains the build command(s) for the project in this directory, and a clean_command.txt that will clean the project. """ common.clean_project(project) """Run dljc Run Randoop to generate test sources Compile test sources Run daikon.Chicory on tests to create dtrace file Precompute graph kernels that are independent of ontology stuff """ common.run_dljc(project, ['dyntrace', 'graphtool'], [ '--graph-jar', common.get_jar('prog2dfg.jar'), '--dyntrace-libs', common.LIBS_DIR ]) """ run petablox """ #run_petablox(project_dir) """ run graph kernel computation """ project_dir = common.get_project_dir(project) kernel_file_path = common.get_kernel_path(project) graph_kernel_cmd = [ 'python', common.get_simprog('precompute_kernel.py'), project_dir, kernel_file_path ] common.run_cmd(graph_kernel_cmd) print 'Generated kernel file for {0}.'.format(project) return kernel_file_path
def run_petablox(project): with common.cd(common.get_project_dir(project)): petablox_cmd = ['java', '-cp', common.get_jar('petablox.jar'), '-Dpetablox.reflect.kind=none', '-Dpetablox.run.analyses=cipa-0cfa-dlog', 'petablox.project.Boot'] common.run_cmd(petablox_cmd)
def generate_graphs(project): """Run dljc Generate program graphs using prog2dfg Precompute graph kernels that are independent of ontology stuff """ common.run_dljc(project, ['graphtool'], ['--graph-jar', common.get_jar('prog2dfg.jar'), '--cache'])
def run_petablox(project): with common.cd(common.get_project_dir(project)): petablox_cmd = [ 'java', '-cp', common.get_jar('petablox.jar'), '-Dpetablox.reflect.kind=none', '-Dpetablox.run.analyses=cipa-0cfa-dlog', 'petablox.project.Boot' ] common.run_cmd(petablox_cmd)
def generate_graphs(project): """Run dljc Compile test sources Generate program graphs using prog2dfg Precompute graph kernels that are independent of ontology stuff """ print("Generating graphs for {0}...".format(project)) common.run_dljc(project, ['graphtool'], ['--graph-jar', common.get_jar('prog2dfg.jar'), '--cache'])
def main(corpus, annotations, limit=3): """ SUMMARY: use case of the user-driven functionality of PASCALI. Scenario: User provides the concept of Sequence and the equivalent Java types, and the concept of sorted sequence and the relevant type invariant. Goal: learn how to get from Sequence -> Sorted Sequence. """ """ INPUT: annotations, dictionary mapping string -> list of strings OUTPUT: recompiles generic-inference-solver with new annotations""" run_pa2checker(annotations) """ Look for new mapping from 'ontology concepts'->'java type' and run checker framework. Should be implemented in type_inference Mapping example: Sequence -> java.lang.Array, java.util.List, LinkedHashSet, etc. INPUT: corpus, file containing set of concept->java_type mapping OUTPUT: Set of jaif files that are merged into the classes. The jaif files are stored as default.jaif in each project's directory. BODY: This also triggers back-end labeled graph generation. """ for project in corpus: run_inference(project) """ Missing step: interact with PA to add a definition of Sorted Sequence which is a specialization of Sequence that has a sortedness invariants. The sortedness invariant gets turned into a Daikon template INPUT: user interaction OUTPUT: type_annotation and type_invariant (for sorted sequence) """ ordering_operator = "<=" ontology_invariant_file = "TODO_from_Howie.txt" with open(ontology_invariant_file, 'w') as f: f.write(ordering_operator) invariant_name = "TODO_sorted_sequence" daikon_pattern_java_file = ontology_to_daikon.create_daikon_invariant( ontology_invariant_file, invariant_name) """ Find all methods that have one input parameter annotated as Sequence and return a variable also annotated as Sequence. INPUT: The corpus and the desired annotations on the method signature OUTPUT: List of methods that have the desired signature. NOTE: This is a stub and will be implemented as LB query in the future. """ sig_methods = find_methods_with_signature(corpus, "@ontology.qual.Sequence", ["@ontology.qual.Sequence"]) print("\n ************") print( "The following corpus methods have the signature Sequence->Sequence {}:" ) for (project, package, clazz, method) in sig_methods: print("{}:\t{}.{}.{}".format(project, package, clazz, method)) print("\n ************") """ Search for methods that have a return type annotated with Sequence and for which we can establish a sortedness invariant (may done by LB). INPUT: dtrace file of project daikon_pattern_java_file that we want to check on the dtrace file. OUTPUT: list of ppt names that establish the invariant. Here a ppt is a Daikon program point, s.a. test01.TestClass01.sort(int[]):::EXIT Note: this step translate the type_invariant into a Daikon template (which is a Java file). """ pattern_class_name = invariant_name pattern_class_dir = os.path.join(common.WORKING_DIR, "invClass") if os.path.isdir(pattern_class_dir): shutil.rmtree(pattern_class_dir) os.mkdir(pattern_class_dir) cmd = [ "javac", "-g", "-classpath", common.get_jar('daikon.jar'), daikon_pattern_java_file, "-d", pattern_class_dir ] common.run_cmd(cmd) list_of_methods = [] for project in corpus: dtrace_file = backend.get_dtrace_file_for_project(project) if not dtrace_file: print("Ignoring folder {} because it does not contain dtrace file". format(project)) continue ppt_names = inv_check.find_ppts_that_establish_inv( dtrace_file, pattern_class_dir, pattern_class_name) methods = set() for ppt in ppt_names: method_name = ppt[:ppt.find(':::EXIT')] methods.add(method_name) list_of_methods += [(project, methods)] print("\n ************") print( "The following corpus methods return a sequence sorted by {}:".format( ordering_operator)) for project, methods in list_of_methods: if len(methods) > 0: print(project) for m in methods: print("\t{}".format(m)) print("\n ************") shutil.rmtree(pattern_class_dir) """ Expansion of dynamic analysis results .... Find a list of similar methods that are similar to the ones found above (list_of_methods). INPUT: list_of_methods, corpus with labeled graphs generated, threshold value for similarity, OUTPUT: superset_list_of_methods """ # WENCHAO print( "Expanding the dynamic analysis results using graph-based similarity:") union_set = set() for project, methods in list_of_methods: # map Daikon output on sort method to method signature in methods.txt in generated graphs for m in methods: method_name = common.get_method_from_daikon_out(m) #kernel_file = common.get_kernel_path(project) method_file = common.get_method_path(project) dot_name = common.find_dot_name(method_name, method_file) if dot_name: # find the right dot file for each method dot_file = common.get_dot_path(project, dot_name) # find all graphs that are similar to it using WL based on some threshold sys.path.append(os.path.join(common.WORKING_DIR, 'simprog')) from similarity import Similarity sim = Similarity() sim.read_graph_kernels( os.path.join(common.WORKING_DIR, "corpus_kernel.txt")) top_k = 3 iter_num = 3 result_program_list_with_score = sim.find_top_k_similar_graphs( dot_file, 'g', top_k, iter_num) print(project + ":") print(result_program_list_with_score) result_set = set( [x[0] for x in result_program_list_with_score]) # take the union of all these graphs union_set = union_set | result_set print("Expanded set:") print([x.split('/')[-4] for x in union_set]) # return this set as a list of (project, method) fo = open("methods.txt", "w") expanded_list = [] for dot_path in union_set: method_summary = common.get_method_summary_from_dot_path(dot_path) fo.write(method_summary) fo.write("\n") fo.close() """ Update the type annotations for the expanded dynamic analysis results. INPUT: superset_list_of_methods, annotation to be added OUTPUT: nothing EFFECT: updates the type annotations of the methods in superset_list_of_methods. This requires some additional checks to make sure that the methods actually perform some kind of sorting. Note that we do it on the superset because the original list_of_methods might miss many implementations because fuzz testing could not reach them. """ for class_file in []: # MARTIN generated_jaif_file = "TODO" insert_jaif.merge_jaif_into_class(class_file, generated_jaif_file) """ Ordering of expanded dynamic analysis results .... Find the k 'best' implementations in superset of list_of_methods INPUT: superset_list_of_methods, corpus, k OUTPUT: k_list_of_methods Note: similarity score is used. may consider using other scores; e.g., TODO:??? """ #TODO: create input file for huascar where each line is formatted like: # ../corpus/Sort05/src/Sort05.java::sort(int[]):int[] ordering_dir = os.path.join(common.WORKING_DIR, "ordering_results/") methods_file = os.path.join(common.WORKING_DIR, 'methods.txt') with common.cd(ordering_dir): #TODO generate a proper relevant methods file. cmd = [ "./run.sh", "-k", "{}".format(limit), "-t", "typicality", "-f", methods_file ] common.run_cmd(cmd, print_output=True) """
import sys, os import subprocess import traceback import urllib import zipfile import ontology_to_daikon import common daikon_jar = common.get_jar("daikon.jar") DAIKON_SPLITTER = "=====================" def run_daikon_on_dtrace_file(dtrace_file, classpath=daikon_jar, checked_invariant=None): cmd = ["java", "-classpath", classpath, "daikon.DaikonSimple", dtrace_file] if checked_invariant: cmd += [ "--disable-all-invariants", "--user-defined-invariant", checked_invariant ] cmd += ["--config_option", "daikon.Daikon.undo_opts=true"] return common.run_cmd(cmd, print_output=True)['output'] def find_ppts_that_establish_inv_in_daikon_output(daikon_output, inv_substring): ppts_with_inv = [] start_of_new_block = False current_method = None lines = daikon_output.splitlines(True)
def main(corpus, annotations): """ SUMMARY: use case of the user-driven functionality of PASCALI. Scenario: User provides the concept of Sequence and the equivalent Java types, and the concept of sorted sequence and the relevant type invariant. Goal: learn how to get from Sequence -> Sorted Sequence. """ """ INPUT: annotations, dictionary mapping string -> list of strings OUTPUT: recompiles generic-inference-solver with new annotations""" run_pa2checker(annotations) """ Look for new mapping from 'ontology concepts'->'java type' and run checker framework. Should be implemented in type_inference Mapping example: Sequence -> java.lang.Array, java.util.List, LinkedHashSet, etc. INPUT: corpus, file containing set of concept->java_type mapping OUTPUT: Set of jaif files that are merged into the classes. The jaif files are stored as default.jaif in each project's directory. BODY: This also triggers back-end labeled graph generation. """ for project in corpus: run_inference(project) """ Missing step: interact with PA to add a definition of Sorted Sequence which is a specialization of Sequence that has a sortedness invariants. The sortedness invariant gets turned into a Daikon template INPUT: user interaction OUTPUT: type_annotation and type_invariant (for sorted sequence) """ ordering_operator = "<=" ontology_invariant_file = "TODO_from_Howie.txt" with open(ontology_invariant_file, 'w') as f: f.write(ordering_operator) invariant_name = "TODO_sorted_sequence" daikon_pattern_java_file = ontology_to_daikon.create_daikon_invariant(ontology_invariant_file, invariant_name) """ Find all methods that have one input parameter annotated as Sequence and return a variable also annotated as Sequence. INPUT: The corpus and the desired annotations on the method signature OUTPUT: List of methods that have the desired signature. NOTE: This is a stub and will be implemented as LB query in the future. """ sig_methods = find_methods_with_signature(corpus, "@ontology.qual.Sequence", ["@ontology.qual.Sequence"]) print ("\n ************") print ("The following corpus methods have the signature Sequence->Sequence {}:") for (project, package, clazz, method) in sig_methods: print("{}:\t{}.{}.{}".format(project, package, clazz, method)) print ("\n ************") """ Search for methods that have a return type annotated with Sequence and for which we can establish a sortedness invariant (may done by LB). INPUT: dtrace file of project daikon_pattern_java_file that we want to check on the dtrace file. OUTPUT: list of ppt names that establish the invariant. Here a ppt is a Daikon program point, s.a. test01.TestClass01.sort(int[]):::EXIT Note: this step translate the type_invariant into a Daikon template (which is a Java file). """ pattern_class_name = invariant_name pattern_class_dir = os.path.join(common.WORKING_DIR, "invClass") if os.path.isdir(pattern_class_dir): shutil.rmtree(pattern_class_dir) os.mkdir(pattern_class_dir) cmd = ["javac", "-g", "-classpath", common.get_jar('daikon.jar'), daikon_pattern_java_file, "-d", pattern_class_dir] common.run_cmd(cmd) list_of_methods = [] for project in corpus: dtrace_file = backend.get_dtrace_file_for_project(project) if not dtrace_file: print ("Ignoring folder {} because it does not contain dtrace file".format(project)) continue ppt_names = inv_check.find_ppts_that_establish_inv(dtrace_file, pattern_class_dir, pattern_class_name) methods = set() for ppt in ppt_names: method_name = ppt[:ppt.find(':::EXIT')] methods.add(method_name) list_of_methods +=[(project, methods)] print ("\n ************") print ("The following corpus methods return a sequence sorted by {}:".format(ordering_operator)) for project, methods in list_of_methods: if len(methods)>0: print (project) for m in methods: print("\t{}".format(m)) print ("\n ************") shutil.rmtree(pattern_class_dir) """ Expansion of dynamic analysis results .... Find a list of similar methods that are similar to the ones found above (list_of_methods). INPUT: list_of_methods, corpus with labeled graphs generated, threshold value for similarity, OUTPUT: superset_list_of_methods """ # WENCHAO print("Expanding the dynamic analysis results using graph-based similarity:") union_set = set() for project, methods in list_of_methods: # map Daikon output on sort method to method signature in methods.txt in generated graphs for m in methods: method_name = common.get_method_from_daikon_out(m) #kernel_file = common.get_kernel_path(project) method_file = common.get_method_path(project) dot_name = common.find_dot_name(method_name, method_file) if dot_name: # find the right dot file for each method dot_file = common.get_dot_path(project, dot_name) # find all graphs that are similar to it using WL based on some threshold sys.path.insert(0, 'simprog') from similarity import Similarity sim = Similarity() sim.read_graph_kernels("corpus_kernel.txt") top_k = 3 iter_num = 3 result_program_list_with_score = sim.find_top_k_similar_graphs(dot_file, 'g', top_k, iter_num) print(project+":") print(result_program_list_with_score) result_set = set([x[0] for x in result_program_list_with_score]) # take the union of all these graphs union_set = union_set | result_set print("Expanded set:") print([x.split('/')[-4] for x in union_set]) # return this set as a list of (project, method) fo = open("methods.txt", "w") expanded_list = [] for dot_path in union_set: method_summary = common.get_method_summary_from_dot_path(dot_path) fo.write(method_summary) fo.write("\n") fo.close() """ Update the type annotations for the expanded dynamic analysis results. INPUT: superset_list_of_methods, annotation to be added OUTPUT: nothing EFFECT: updates the type annotations of the methods in superset_list_of_methods. This requires some additional checks to make sure that the methods actually perform some kind of sorting. Note that we do it on the superset because the original list_of_methods might miss many implementations because fuzz testing could not reach them. """ for class_file in []: # MARTIN generated_jaif_file = "TODO" insert_jaif.merge_jaif_into_class(class_file, generated_jaif_file) """ Ordering of expanded dynamic analysis results .... Find the k 'best' implementations in superset of list_of_methods INPUT: superset_list_of_methods, corpus, k OUTPUT: k_list_of_methods Note: similarity score is used. may consider using other scores; e.g., TODO:??? """ #TODO: create input file for huascar where each line is formatted like: # ../corpus/Sort05/src/Sort05.java::sort(int[]):int[] ordering_dir = os.path.join(common.WORKING_DIR, "ordering_results/") methods_file = os.path.join(common.WORKING_DIR, 'methods.txt') with common.cd(ordering_dir): #TODO generate a proper relevant methods file. cmd = ["./run.sh", "-k", "3", "-t", "typicality", "-f", methods_file] common.run_cmd(cmd, print_output=True) """ Close the loop and add the best implementation found in the previous step back to the ontology. INPUT: k_list_of_methods OUTPUT: patch file for the ontology. Worst case: just add the 'best' implementation found in the corpus as a blob to the ontology. Best case: generate an equivalent flow-graph in the ontology. """ print "TODO" # ALL