def count(): sort = load_data(sort_path, "cpp") tree = load_data(tree_path, "cpp") dp = load_data(dp_path, "cpp") print(sort.__len__()) print(tree.__len__()) print(dp.__len__())
def count(): # array = load_data(array_path, "java") # math = load_data(math_path, "java") # search = load_data(search_path, "java") sort = load_data(sort_path, "java") # string = load_data(string_path, "java") tree = load_data(tree_path, "java") dp = load_data(dp_path, "java") raw = load_data(raw_path, "java") print(sort.__len__()) print(tree.__len__()) print(dp.__len__()) print(raw.__len__())
def init_tokenizer(): cpp_data = clc.load_data(data_path, "cpp") tokenizer = Tokenizer(filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', ) tokenizer.fit_on_texts(cpp_data) # save f = open('tokenizer_lcs_cpp.pkl', 'wb') pickle.dump(tokenizer, f) f.close()
def get_top_three_ast_sim(sim_list, xml_file): list = [] for i in range(len(sim_list)): path = sim_list[i][0] xml_path = cfc.code_path2xml_path(path) ans = ast_evaluator.tree_distance_similarities(xml_path, xml_file) list.append(ans) print(ans) return list def get_func_path(func, language): return os.path.join(data_path_dict[language], func) def predict_function(path, language): return func_model[language].predict(path) def creat_file(file_path, source_text): f = open(file_path, 'w+', encoding='UTF-8') f.write(source_text) if __name__ == "__main__": data_path = '../../data/leetcode/raw' ret = clc.load_data(data_path, "java") print(ret[54]) ret = recommend_code("") print(ret)