Пример #1
0
def compare_functions(r_func_name, pd_func_name):
    r_func = r_functions.get_r_function(r_source_file, r_func_name)
    pd_func = pd_functions.get_pd_functions(pd_source_file,
                                            as_dict=True)[pd_func_name]
    generated_args = get_generated_args(r_func, 100)
    r_results = r_functions.execute_R_function_on_args(r_func, generated_args)
    r_executed = {
        "name": r_func_name,
        "body": r_functions.get_function_body(r_func),
        "inputKey": "DF-KEY",
        "outputs": r_results
    }
    pd_results = pd_functions.execute_pd_function_on_args(
        pd_func, generated_args)
    pd_executed = {
        "name": pd_func_name,
        "body": helper.get_func_body(pd_func),
        "inputKey": "DF-KEY",
        "outputs": pd_results
    }
    r_func = Function(name=r_func_name,
                      input_key=r_executed["inputKey"],
                      outputs=Outputs(r_executed["outputs"]),
                      body=test_clustering.get_body(r_executed),
                      source=r_source_file)
    pd_func = Function(name=pd_func_name,
                       input_key=pd_executed["inputKey"],
                       outputs=Outputs(pd_executed["outputs"]),
                       body=test_clustering.get_body(pd_executed),
                       source=pd_source_file)
    print(test_clustering.execution_similarity(r_func, pd_func))
Пример #2
0
def load_functions(dataset, is_test=False, update_clone_meta=False):
    LOGGER.info("Loading java functions for '%s' ... " % dataset)
    data_store = get_store(dataset, is_test=is_test)
    functions_arr = data_store.load_functions()
    function_pattern = re.compile(r'^func_')
    functions = []
    for func_dict in functions_arr:
        if not function_pattern.match(func_dict['name']): continue
        function_metadata = data_store.load_metadata(func_dict)
        if not function_metadata or not function_metadata.get("return", None):
            continue
        return_meta_data = function_metadata["return"]
        outputs = Outputs(func_dict["outputs"])
        funct = Function(name=func_dict["name"],
                         dataset=dataset,
                         class_name=func_dict["class"],
                         package=func_dict["package"],
                         input_key=func_dict["inputKey"],
                         outputs=outputs,
                         lines_touched=function_metadata.get(
                             "linesTouched", None),
                         span=function_metadata.get("span", None),
                         body=function_metadata["body"],
                         source="java")
        if data_store.is_object_return(return_meta_data):
            cloned_function_names = get_execution_store(
                dataset).load_cloned_function_names(funct.name)
            updated_cloned_function_names = {}
            for attribute, returns in data_store.get_return_vals(
                    outputs.returns).items():
                clone = funct.clone()
                clone.outputs = outputs.clone()
                clone.outputs.returns = returns[:]
                clone.return_attribute = attribute
                if cloned_function_names and attribute in cloned_function_names:
                    clone.name = cloned_function_names[attribute]
                updated_cloned_function_names[attribute] = clone.name
                functions.append(clone)
            if update_clone_meta:
                get_execution_store(dataset).save_cloned_function_names(
                    funct.name, updated_cloned_function_names)
        else:
            functions.append(funct)
    if is_test:
        return functions
    valid_functions = [func for func in functions if func.is_useful()]
    LOGGER.info("Valid Functions : %d / %d" %
                (len(valid_functions), len(functions)))
    return valid_functions
Пример #3
0
def load_py_functions(dataset, is_test=False):
    LOGGER.info("Loading python functions for '%s' ... " % dataset)
    data_store = get_store(dataset, is_test=is_test)
    functions_arr = data_store.load_py_functions()
    function_pattern = re.compile(r'^func_')
    functions = []
    for func_dict in functions_arr:
        if not function_pattern.match(func_dict['name']): continue
        function_metadata = data_store.load_py_metadata(func_dict['name'])
        outputs = Outputs(func_dict["outputs"])
        funct = Function(name=func_dict["name"],
                         dataset=dataset,
                         input_key=func_dict["inputKey"],
                         outputs=outputs,
                         lines_touched=function_metadata.get(
                             "linesTouched", None),
                         span=function_metadata.get("span", None),
                         body=function_metadata["body"],
                         source="python")
        functions.append(funct)
    if is_test:
        return functions
    valid_functions = [func for func in functions if func.is_useful()]
    LOGGER.info("Valid Functions : %d / %d" %
                (len(valid_functions), len(functions)))
    return valid_functions
Пример #4
0
def similarity(r_func_name, py_func_name):
    r_func = r_functions.get_r_functions(R_FUNCTIONS_SOURCE_FILE)[r_func_name]
    pd_func = pd_functions.get_pd_functions(PD_FUNCTIONS_SOURCE_FILE,
                                            as_dict=True)[py_func_name]
    r_executed = process_R_function(R_FUNCTIONS_SOURCE_FILE, r_func_name,
                                    r_func)
    r_func = Function(name=r_func_name,
                      input_key=r_executed["inputKey"],
                      outputs=Outputs(r_executed["outputs"]),
                      body=test_clustering.get_body(r_executed),
                      source=R_FUNCTIONS_SOURCE_FILE)
    pd_executed = process_pd_function(PD_FUNCTIONS_SOURCE_FILE, pd_func)
    pd_func = Function(name=py_func_name,
                       input_key=pd_executed["inputKey"],
                       outputs=Outputs(pd_executed["outputs"]),
                       body=test_clustering.get_body(pd_executed),
                       source=PD_FUNCTIONS_SOURCE_FILE)
    print(test_clustering.execution_similarity(r_func, pd_func))
Пример #5
0
def format_outputs(outputs):
    formatted_outputs = {}
    for key, output in outputs.items():
        formatted = Outputs()
        formatted.is_all_same = True
        prev_vals = []
        for o in output:
            ret_formatted = format_return(
                o["return"]) if "return" in o else None
            formatted.returns.append(ret_formatted)
            formatted.errors.append(o["errorMessage"] if "errorMessage" in
                                    o else None)
            formatted.durations.append(o["duration"] if "duration" in
                                       o else None)
            if len(prev_vals) == 0:
                prev_vals.append(ret_formatted)
            elif formatted.is_all_same and not is_equal(
                    ret_formatted, prev_vals[0]):
                formatted.is_all_same = False
        formatted_outputs[key] = formatted
    return formatted_outputs
Пример #6
0
def _test_function():
  dataset = "codejam"
  data_store = get_store("codejam")
  func_dict = data_store.load_function("func_b1d6e0e04b4f4065870c60fcba28ff0c")
  function_metadata = data_store.load_metadata(func_dict)
  outputs = Outputs(func_dict["outputs"])
  funct = Function(name=func_dict["name"], dataset=dataset,
                   class_name=func_dict["class"], package=func_dict["package"],
                   input_key=func_dict["inputKey"], outputs=outputs,
                   lines_touched=function_metadata.get("linesTouched", None),
                   span=function_metadata.get("span", None), body=function_metadata["body"], source="java")
  print func_dict
  print(funct.is_useful())
Пример #7
0
def load_functions(functions_path, source):
    functions_dict = cache.load_pickle(functions_path)
    functions = []
    for func_name, func_dict in functions_dict.items():
        outputs = Outputs(func_dict["outputs"])
        funct = Function(name=func_name,
                         input_key=func_dict["inputKey"],
                         outputs=outputs,
                         body=get_body(func_dict),
                         source=source)
        functions.append(funct)
    valid_functions = {
        funct.name: funct
        for funct in functions if is_useful_function(funct)
    }
    LOGGER.info("Valid Functions : %d / %d" %
                (len(valid_functions), len(functions)))
    return valid_functions