Пример #1
0
def add_project_to_corpus(project):
  """ Assumes that the project_dir contains a
  text file named build_command.txt that contains the build command(s) for the
  project in this directory, and a clean_command.txt that will clean the project.
  """
  common.clean_project(project)

  """Run dljc
  Run Randoop to generate test sources
  Compile test sources
  Run daikon.Chicory on tests to create dtrace file
  Precompute graph kernels that are independent of ontology stuff
  """
  common.run_dljc(project,
                  ['dyntrace', 'graphtool'],
                  ['--graph-jar', common.get_jar('prog2dfg.jar'),
                   '--dyntrace-libs', common.LIBS_DIR])

  """ run petablox """
  #run_petablox(project_dir)

  """ run graph kernel computation """
  project_dir = common.get_project_dir(project)
  kernel_file_path = common.get_kernel_path(project)
  graph_kernel_cmd = ['python',
                      common.get_simprog('precompute_kernel.py'),
                      project_dir,
                      kernel_file_path
                      ]
  common.run_cmd(graph_kernel_cmd)
  print 'Generated kernel file for {0}.'.format(project)
  return kernel_file_path
Пример #2
0
def compute_clusters_for_classes(project_list, out_file_name, cf_map_file_name="./class_field_map.json", wf_map_file_name="./word_based_field_clusters.json"):
  class_dirs = list()
  for project in project_list:
    print common.get_class_dirs(project)
    class_dirs.extend(common.get_class_dirs(project))
  if len(class_dirs)<1:
    print("No class dirs found to cluster. Make sure you run dljc first.")
    return

  

  clusterer_cmd = ['java', '-jar', common.get_jar('clusterer.jar'),
                   '-cs', '3',
                   '-out', out_file_name,
                   '-cfm', cf_map_file_name,
                   '-wfm', wf_map_file_name,
                   '-dirs'
                  ]
  clusterer_cmd.extend(class_dirs)

  common.run_cmd(clusterer_cmd, True) 

  # Check if the file exists and is not empty.
  if os.path.exists(wf_map_file_name) and os.path.getsize(wf_map_file_name) > 0:
    print ("Generate jaif file")
    map2annotation.field_mappings_to_annotation(project_list, wf_map_file_name)
    for project in project_list:
        map2annotation.run_anno_inference(project)
  else:
    print("Warning: Missing or empty {0} file.".format(wf_map_file_name))
    print("Warning: map2annotation won't be executed.")
Пример #3
0
def get_daikon_patterns():
    ordering_operator = "<="

    ontology_invariant_file = "TODO_from_Howie.txt"
    with open(ontology_invariant_file, 'w') as f:
        f.write(ordering_operator)

    invariant_name = "TODO_sorted_sequence"

    daikon_pattern_java_file = ontology_to_daikon.create_daikon_invariant(
        ontology_invariant_file, invariant_name)

    pattern_class_dir = os.path.join(common.WORKING_DIR, "invClass")
    if os.path.isdir(pattern_class_dir):
        shutil.rmtree(pattern_class_dir)
    os.mkdir(pattern_class_dir)

    cmd = [
        "javac", "-g", "-classpath",
        common.get_jar('daikon.jar'), daikon_pattern_java_file, "-d",
        pattern_class_dir
    ]
    common.run_cmd(cmd)

    return pattern_class_dir
Пример #4
0
def add_project_to_corpus(project):
    """ Assumes that the project_dir contains a
  text file named build_command.txt that contains the build command(s) for the
  project in this directory, and a clean_command.txt that will clean the project.
  """
    common.clean_project(project)
    """Run dljc
  Run Randoop to generate test sources
  Compile test sources
  Run daikon.Chicory on tests to create dtrace file
  Precompute graph kernels that are independent of ontology stuff
  """
    common.run_dljc(project, ['dyntrace', 'graphtool'], [
        '--graph-jar',
        common.get_jar('prog2dfg.jar'), '--dyntrace-libs', common.LIBS_DIR
    ])
    """ run petablox """
    #run_petablox(project_dir)
    """ run graph kernel computation """
    project_dir = common.get_project_dir(project)
    kernel_file_path = common.get_kernel_path(project)
    graph_kernel_cmd = [
        'python',
        common.get_simprog('precompute_kernel.py'), project_dir,
        kernel_file_path
    ]
    common.run_cmd(graph_kernel_cmd)
    print 'Generated kernel file for {0}.'.format(project)
    return kernel_file_path
Пример #5
0
def run_petablox(project):
  with common.cd(common.get_project_dir(project)):
    petablox_cmd = ['java',
                    '-cp', common.get_jar('petablox.jar'),
                    '-Dpetablox.reflect.kind=none',
                    '-Dpetablox.run.analyses=cipa-0cfa-dlog',
                    'petablox.project.Boot']
    common.run_cmd(petablox_cmd)
Пример #6
0
def generate_graphs(project):
    """Run dljc
  Generate program graphs using prog2dfg
  Precompute graph kernels that are independent of ontology stuff
  """
    common.run_dljc(project, ['graphtool'],
                    ['--graph-jar',
                     common.get_jar('prog2dfg.jar'), '--cache'])
Пример #7
0
def run_petablox(project):
    with common.cd(common.get_project_dir(project)):
        petablox_cmd = [
            'java', '-cp',
            common.get_jar('petablox.jar'), '-Dpetablox.reflect.kind=none',
            '-Dpetablox.run.analyses=cipa-0cfa-dlog', 'petablox.project.Boot'
        ]
        common.run_cmd(petablox_cmd)
Пример #8
0
def generate_graphs(project):
  """Run dljc
  Compile test sources
  Generate program graphs using prog2dfg
  Precompute graph kernels that are independent of ontology stuff
  """
  print("Generating graphs for {0}...".format(project))
  common.run_dljc(project,
                  ['graphtool'],
                  ['--graph-jar', common.get_jar('prog2dfg.jar'),
                   '--cache'])
Пример #9
0
def main(corpus, annotations, limit=3):
    """ SUMMARY: use case of the user-driven functionality of PASCALI.
  Scenario: User provides the concept of Sequence and the equivalent Java
  types, and the concept of sorted sequence and the relevant type invariant.
  Goal: learn how to get from Sequence -> Sorted Sequence.
  """
    """
  INPUT: annotations, dictionary mapping string -> list of strings
  OUTPUT: recompiles generic-inference-solver with new annotations"""

    run_pa2checker(annotations)
    """ Look for new mapping from 'ontology concepts'->'java type' and run
  checker framework. Should be implemented in type_inference
  Mapping example:
    Sequence -> java.lang.Array, java.util.List, LinkedHashSet, etc.

  INPUT: corpus, file containing set of concept->java_type mapping
  OUTPUT: Set of jaif files that are merged into the classes. The jaif files are
          stored as default.jaif in each project's directory.
  BODY: This also triggers back-end labeled graph generation.
  """

    for project in corpus:
        run_inference(project)
    """ Missing step: interact with PA to add a definition of Sorted Sequence
  which is a specialization of Sequence that has a sortedness invariants.
  The sortedness invariant gets turned into a Daikon template
  INPUT: user interaction
  OUTPUT: type_annotation and type_invariant (for sorted sequence)

  """

    ordering_operator = "<="

    ontology_invariant_file = "TODO_from_Howie.txt"
    with open(ontology_invariant_file, 'w') as f:
        f.write(ordering_operator)

    invariant_name = "TODO_sorted_sequence"

    daikon_pattern_java_file = ontology_to_daikon.create_daikon_invariant(
        ontology_invariant_file, invariant_name)
    """ Find all methods that have one input parameter annotated as Sequence and return a variable also
  annotated as Sequence.
  INPUT: The corpus and the desired annotations on the method signature
  OUTPUT: List of methods that have the desired signature.
  NOTE: This is a stub and will be implemented as LB query in the future.
  """
    sig_methods = find_methods_with_signature(corpus,
                                              "@ontology.qual.Sequence",
                                              ["@ontology.qual.Sequence"])
    print("\n   ************")
    print(
        "The following corpus methods have the signature Sequence->Sequence {}:"
    )
    for (project, package, clazz, method) in sig_methods:
        print("{}:\t{}.{}.{}".format(project, package, clazz, method))
    print("\n   ************")
    """ Search for methods that have a return type annotated with Sequence
  and for which we can establish a sortedness invariant (may done by LB).

  INPUT: dtrace file of project
         daikon_pattern_java_file that we want to check on the dtrace file.

  OUTPUT: list of ppt names that establish the invariant. Here a ppt
  is a Daikon program point, s.a. test01.TestClass01.sort(int[]):::EXIT

  Note: this step translate the type_invariant into a Daikon
  template (which is a Java file).
  """

    pattern_class_name = invariant_name
    pattern_class_dir = os.path.join(common.WORKING_DIR, "invClass")
    if os.path.isdir(pattern_class_dir):
        shutil.rmtree(pattern_class_dir)
    os.mkdir(pattern_class_dir)

    cmd = [
        "javac", "-g", "-classpath",
        common.get_jar('daikon.jar'), daikon_pattern_java_file, "-d",
        pattern_class_dir
    ]
    common.run_cmd(cmd)

    list_of_methods = []
    for project in corpus:
        dtrace_file = backend.get_dtrace_file_for_project(project)
        if not dtrace_file:
            print("Ignoring folder {} because it does not contain dtrace file".
                  format(project))
            continue
        ppt_names = inv_check.find_ppts_that_establish_inv(
            dtrace_file, pattern_class_dir, pattern_class_name)
        methods = set()
        for ppt in ppt_names:
            method_name = ppt[:ppt.find(':::EXIT')]
            methods.add(method_name)
        list_of_methods += [(project, methods)]

    print("\n   ************")
    print(
        "The following corpus methods return a sequence sorted by {}:".format(
            ordering_operator))
    for project, methods in list_of_methods:
        if len(methods) > 0:
            print(project)
            for m in methods:
                print("\t{}".format(m))
    print("\n   ************")

    shutil.rmtree(pattern_class_dir)
    """ Expansion of dynamic analysis results ....
  Find a list of similar methods that are similar to the ones found above (list_of_methods).
  INPUT: list_of_methods, corpus with labeled graphs generated, threshold value for similarity,
  OUTPUT: superset_list_of_methods
  """

    # WENCHAO
    print(
        "Expanding the dynamic analysis results using graph-based similarity:")
    union_set = set()
    for project, methods in list_of_methods:
        # map Daikon output on sort method to method signature in methods.txt in generated graphs
        for m in methods:
            method_name = common.get_method_from_daikon_out(m)
            #kernel_file = common.get_kernel_path(project)
            method_file = common.get_method_path(project)
            dot_name = common.find_dot_name(method_name, method_file)
            if dot_name:
                # find the right dot file for each method
                dot_file = common.get_dot_path(project, dot_name)
                # find all graphs that are similar to it using WL based on some threshold
                sys.path.append(os.path.join(common.WORKING_DIR, 'simprog'))
                from similarity import Similarity
                sim = Similarity()
                sim.read_graph_kernels(
                    os.path.join(common.WORKING_DIR, "corpus_kernel.txt"))
                top_k = 3
                iter_num = 3
                result_program_list_with_score = sim.find_top_k_similar_graphs(
                    dot_file, 'g', top_k, iter_num)
                print(project + ":")
                print(result_program_list_with_score)
                result_set = set(
                    [x[0] for x in result_program_list_with_score])
                # take the union of all these graphs
                union_set = union_set | result_set
    print("Expanded set:")
    print([x.split('/')[-4] for x in union_set])

    # return this set as a list of (project, method)
    fo = open("methods.txt", "w")
    expanded_list = []
    for dot_path in union_set:
        method_summary = common.get_method_summary_from_dot_path(dot_path)
        fo.write(method_summary)
        fo.write("\n")
    fo.close()
    """ Update the type annotations for the expanded dynamic analysis results.
  INPUT: superset_list_of_methods, annotation to be added
  OUTPUT: nothing
  EFFECT: updates the type annotations of the methods in superset_list_of_methods.
  This requires some additional checks to make sure that the methods actually
  perform some kind of sorting. Note that we do it on the superset because the original
  list_of_methods might miss many implementations because fuzz testing could not
  reach them.
  """
    for class_file in []:  # MARTIN
        generated_jaif_file = "TODO"
        insert_jaif.merge_jaif_into_class(class_file, generated_jaif_file)
    """ Ordering of expanded dynamic analysis results ....
  Find the k 'best' implementations in superset of list_of_methods
  INPUT: superset_list_of_methods, corpus, k
  OUTPUT: k_list_of_methods
  Note: similarity score is used. may consider using other scores; e.g., TODO:???
  """

    #TODO: create input file for huascar where each line is formatted like:
    # ../corpus/Sort05/src/Sort05.java::sort(int[]):int[]

    ordering_dir = os.path.join(common.WORKING_DIR, "ordering_results/")

    methods_file = os.path.join(common.WORKING_DIR, 'methods.txt')
    with common.cd(ordering_dir):
        #TODO generate a proper relevant methods file.
        cmd = [
            "./run.sh", "-k", "{}".format(limit), "-t", "typicality", "-f",
            methods_file
        ]
        common.run_cmd(cmd, print_output=True)
    """
Пример #10
0
import sys, os
import subprocess
import traceback
import urllib
import zipfile
import ontology_to_daikon
import common

daikon_jar = common.get_jar("daikon.jar")
DAIKON_SPLITTER = "====================="


def run_daikon_on_dtrace_file(dtrace_file,
                              classpath=daikon_jar,
                              checked_invariant=None):
    cmd = ["java", "-classpath", classpath, "daikon.DaikonSimple", dtrace_file]
    if checked_invariant:
        cmd += [
            "--disable-all-invariants", "--user-defined-invariant",
            checked_invariant
        ]
        cmd += ["--config_option", "daikon.Daikon.undo_opts=true"]
    return common.run_cmd(cmd, print_output=True)['output']


def find_ppts_that_establish_inv_in_daikon_output(daikon_output,
                                                  inv_substring):
    ppts_with_inv = []
    start_of_new_block = False
    current_method = None
    lines = daikon_output.splitlines(True)
Пример #11
0
def main(corpus, annotations):
  """ SUMMARY: use case of the user-driven functionality of PASCALI.
  Scenario: User provides the concept of Sequence and the equivalent Java
  types, and the concept of sorted sequence and the relevant type invariant.
  Goal: learn how to get from Sequence -> Sorted Sequence.
  """

  """
  INPUT: annotations, dictionary mapping string -> list of strings
  OUTPUT: recompiles generic-inference-solver with new annotations"""

  run_pa2checker(annotations)

  """ Look for new mapping from 'ontology concepts'->'java type' and run
  checker framework. Should be implemented in type_inference
  Mapping example:
    Sequence -> java.lang.Array, java.util.List, LinkedHashSet, etc.

  INPUT: corpus, file containing set of concept->java_type mapping
  OUTPUT: Set of jaif files that are merged into the classes. The jaif files are
          stored as default.jaif in each project's directory.
  BODY: This also triggers back-end labeled graph generation.
  """

  for project in corpus:
    run_inference(project)

  """ Missing step: interact with PA to add a definition of Sorted Sequence
  which is a specialization of Sequence that has a sortedness invariants.
  The sortedness invariant gets turned into a Daikon template
  INPUT: user interaction
  OUTPUT: type_annotation and type_invariant (for sorted sequence)

  """

  ordering_operator = "<="

  ontology_invariant_file = "TODO_from_Howie.txt"
  with open(ontology_invariant_file, 'w') as f:
    f.write(ordering_operator)

  invariant_name = "TODO_sorted_sequence"

  daikon_pattern_java_file = ontology_to_daikon.create_daikon_invariant(ontology_invariant_file, invariant_name)


  """ Find all methods that have one input parameter annotated as Sequence and return a variable also
  annotated as Sequence.
  INPUT: The corpus and the desired annotations on the method signature
  OUTPUT: List of methods that have the desired signature.
  NOTE: This is a stub and will be implemented as LB query in the future.
  """
  sig_methods = find_methods_with_signature(corpus, "@ontology.qual.Sequence", ["@ontology.qual.Sequence"])
  print ("\n   ************")
  print ("The following corpus methods have the signature Sequence->Sequence {}:")
  for (project, package, clazz, method) in sig_methods:
    print("{}:\t{}.{}.{}".format(project, package, clazz, method))
  print ("\n   ************")


  """ Search for methods that have a return type annotated with Sequence
  and for which we can establish a sortedness invariant (may done by LB).

  INPUT: dtrace file of project
         daikon_pattern_java_file that we want to check on the dtrace file.

  OUTPUT: list of ppt names that establish the invariant. Here a ppt
  is a Daikon program point, s.a. test01.TestClass01.sort(int[]):::EXIT

  Note: this step translate the type_invariant into a Daikon
  template (which is a Java file).
  """

  pattern_class_name = invariant_name
  pattern_class_dir = os.path.join(common.WORKING_DIR, "invClass")
  if os.path.isdir(pattern_class_dir):
    shutil.rmtree(pattern_class_dir)
  os.mkdir(pattern_class_dir)

  cmd = ["javac", "-g", "-classpath", common.get_jar('daikon.jar'),
         daikon_pattern_java_file, "-d", pattern_class_dir]
  common.run_cmd(cmd)

  list_of_methods = []
  for project in corpus:
    dtrace_file = backend.get_dtrace_file_for_project(project)
    if not dtrace_file:
      print ("Ignoring folder {} because it does not contain dtrace file".format(project))
      continue
    ppt_names = inv_check.find_ppts_that_establish_inv(dtrace_file, pattern_class_dir, pattern_class_name)
    methods = set()
    for ppt in ppt_names:
      method_name = ppt[:ppt.find(':::EXIT')]
      methods.add(method_name)
    list_of_methods +=[(project, methods)]

  print ("\n   ************")
  print ("The following corpus methods return a sequence sorted by {}:".format(ordering_operator))
  for project, methods in list_of_methods:
    if len(methods)>0:
      print (project)
      for m in methods:
        print("\t{}".format(m))
  print ("\n   ************")

  shutil.rmtree(pattern_class_dir)

  """ Expansion of dynamic analysis results ....
  Find a list of similar methods that are similar to the ones found above (list_of_methods).
  INPUT: list_of_methods, corpus with labeled graphs generated, threshold value for similarity,
  OUTPUT: superset_list_of_methods
  """

  # WENCHAO
  print("Expanding the dynamic analysis results using graph-based similarity:")
  union_set = set()
  for project, methods in list_of_methods:
    # map Daikon output on sort method to method signature in methods.txt in generated graphs
    for m in methods:
      method_name = common.get_method_from_daikon_out(m)
      #kernel_file = common.get_kernel_path(project)
      method_file = common.get_method_path(project)
      dot_name = common.find_dot_name(method_name, method_file)
      if dot_name:
        # find the right dot file for each method
        dot_file = common.get_dot_path(project, dot_name)
        # find all graphs that are similar to it using WL based on some threshold
        sys.path.insert(0, 'simprog')
        from similarity import Similarity
        sim = Similarity()
        sim.read_graph_kernels("corpus_kernel.txt")
        top_k = 3
        iter_num = 3
        result_program_list_with_score = sim.find_top_k_similar_graphs(dot_file, 'g', top_k, iter_num)
        print(project+":")
        print(result_program_list_with_score)
        result_set = set([x[0] for x in result_program_list_with_score])
        # take the union of all these graphs
        union_set = union_set | result_set
  print("Expanded set:")
  print([x.split('/')[-4] for x in union_set])

  # return this set as a list of (project, method)
  fo = open("methods.txt", "w")
  expanded_list = []
  for dot_path in union_set:
    method_summary = common.get_method_summary_from_dot_path(dot_path)
    fo.write(method_summary)
    fo.write("\n")
  fo.close()

  """ Update the type annotations for the expanded dynamic analysis results.
  INPUT: superset_list_of_methods, annotation to be added
  OUTPUT: nothing
  EFFECT: updates the type annotations of the methods in superset_list_of_methods.
  This requires some additional checks to make sure that the methods actually
  perform some kind of sorting. Note that we do it on the superset because the original
  list_of_methods might miss many implementations because fuzz testing could not
  reach them.
  """
  for class_file in []: # MARTIN
    generated_jaif_file = "TODO"
    insert_jaif.merge_jaif_into_class(class_file, generated_jaif_file)


  """ Ordering of expanded dynamic analysis results ....
  Find the k 'best' implementations in superset of list_of_methods
  INPUT: superset_list_of_methods, corpus, k
  OUTPUT: k_list_of_methods
  Note: similarity score is used. may consider using other scores; e.g., TODO:???
  """

  #TODO: create input file for huascar where each line is formatted like:
  # ../corpus/Sort05/src/Sort05.java::sort(int[]):int[]

  ordering_dir = os.path.join(common.WORKING_DIR, "ordering_results/")

  methods_file = os.path.join(common.WORKING_DIR, 'methods.txt')
  with common.cd(ordering_dir):
    #TODO generate a proper relevant methods file.
    cmd = ["./run.sh",
           "-k", "3",
           "-t", "typicality",
           "-f", methods_file]
    common.run_cmd(cmd, print_output=True)

  """
  Close the loop and add the best implementation found in the previous
  step back to the ontology.
  INPUT: k_list_of_methods
  OUTPUT: patch file for the ontology. Worst case: just add the 'best' implementation
  found in the corpus as a blob to the ontology. Best case: generate an equivalent
  flow-graph in the ontology.
  """
  print "TODO" # ALL