def get_commit_id(csv_path, tmp_dir='/home/ise/', p_name='Math'): df = pd.read_csv(csv_path, index_col=0) df.reset_index(drop=True, inplace=True) out = pt.mkdir_system(tmp_dir, 'tmp_d4j', False) out_csv = pt.mkdir_system(out, 'projects', False) out_git = pt.mkdir_system(out, 'gits', True) df['date_commit_buggy'] = df.apply(get_date_revsion, out=out_git, p_name=p_name, axis=1) df.to_csv('{}/{}_commit.csv'.format(out_csv, p_name)) get_date_commit_repoGIT(project_dict[p_name]['repo_path'], '{}/{}_commit.csv'.format(out_csv, p_name))
def get_tag_commit( csv_file='/home/ise/tmp_d4j/projects/csvs/Closure_commit_Git.csv', out='/home/ise/tmp_d4j/config'): df = pd.read_csv(csv_file, index_col=0) project_name = str(csv_file).split('/')[-1].split('_')[0] out_proj_curr = pt.mkdir_system(out, '{}'.format(project_name), False) log_out_files = pt.mkdir_system(out_proj_curr, 'LOG', False) config_out_files = pt.mkdir_system(out_proj_curr, 'File_conf', False) df['TAG_NAME'] = df['buggy_Git_ID_commit'].apply( lambda commit_i: make_auto_config_TAG(config_out_files, log_out_files, project_name, commit_i)) #df['TAG_DATE'] = df['buggy_Git_ID_commit'].apply( # lambda commit_i: make_auto_config_TAG(config_out_files, log_out_files, project_name, commit_i, time_bol=True)) df.to_csv('{}/df.csv'.format(out_proj_curr))
def compile_java_class(dir_to_compile, output_dir, dependent_dir): """ this function compile the .java tests to .class :param dir_to_compile: path where .java files :param output_dir: output dir where .class will be found :param dependent_dir: .jar for the compilation process :return: output dir path """ #if path.isdir(dir_to_compile) is False: # msg = "no dir : {}".format(dir_to_compile) # raise Exception(msg) out_dir = pt.mkdir_system(output_dir, 'test_classes') files = pt.walk_rec(dependent_dir, [], '.jar', lv=-2) files.append( '/home/ise/eran/evosuite/jar/evosuite-standalone-runtime-1.0.6.jar') jars_string = ':'.join(files) dir_to_compile = '{}*'.format(dir_to_compile) string_command = "javac {0} -verbose -Xlint -cp {1} -d {2} -s {2} -h {2}".format( dir_to_compile, jars_string, out_dir) print "[OS] {}".format(string_command) os.system(string_command) return process = Popen(shlex.split(string_command), stdout=PIPE, stderr=PIPE) stdout, stderr = process.communicate() print "----stdout----" print stdout print "----stderr----" print stderr return out_dir
def gatther_info_make_dir(row, out, list_info): bug_id = row['bug_ID'] proj_name = row['Project_Name'] test_index = row['Test_index'] time_b = row['time_budget'] date = row['Date'] date = '_'.join(str(date).split('_')[-5:]) name = 'P_{}_B_{}_T_{}_I_{}_D_{}'.format(proj_name, bug_id, time_b, test_index, date) out_dir_i = pt.mkdir_system(out, name) arr_dir_name = ['debug_dir', 'log', 'out_test'] for dir_name in arr_dir_name: pt.mkdir_system(out_dir_i, dir_name) list_info.append({'p_name': proj_name, 'output': "{}/out_test".format(out_dir_i), 'log': "{}/log".format(out_dir_i), 'tmp_dir': "{}/debug_dir".format(out_dir_i), 'path': row['JAR_path'], 'version': bug_id}) return out_dir_i
def manger(root_dir, out_dir, filter_time_b=None): """ manage the process """ if os.path.isdir(root_dir) is False: msg = "[Error] the dir path is not a valid one --> {}".format(root_dir) raise Exception(msg) src_dir = pt.mkdir_system(out_dir, 'JARS_D4J') df = scan_all_test_jar(root_dir, out=src_dir) list_jar_info = [] df['time_budget'] = df['time_budget'].astype(int) print df.dtypes if filter_time_b is not None: df = df.loc[df['time_budget'].isin(filter_time_b)] df = df.reset_index() df.set_index(np.arange(len(df.index)), inplace=True) df.drop_duplicates(inplace=False) print 'len(df):\t{}'.format(len(df)) df['out_dir'] = df.apply(gatther_info_make_dir, out=src_dir, list_info=list_jar_info, axis=1) util_d4j.run_tests(list_jar_info) jar_making_process(src_dir) os.chdir(src_dir) util_d4j.rm_dir_by_name(src_dir, 'debug_dir') util_d4j.rm_dir_by_name(src_dir, 'out_test') mk_call_graph_raw_data(src_dir) mk_call_graph_df(src_dir)
def read_and_mereg(dico, out_path): col = ['ID', 'status'] out_dir = pit_render_test.mkdir_system(out_path, 'out_xml_all', True) for ky in dico: list_df = [] for p_csv in dico[ky]: name_col = None for x in str(p_csv).split('/'): if str(x).__contains__('ALL'): name_col = x time_b = str(x).split('t=')[1].split('_')[0] break if name_col is None: str_err = "[Error] something wrong with the path {} no ALL dir ".format( p_csv) raise Exception("{}".format(str_err)) df = pd.read_csv(p_csv, index_col=0) df = df[col] df.rename(columns={'status': '{}'.format(name_col)}, inplace=True) list_df.append(df) if len(list_df) > 0: m_df = list_df[0] for item in list_df[1:]: m_df = pd.merge(m_df, item, on=['ID']) print "ky:{} size_df:{}".format(ky, m_df.shape) #m_df['killed'] = m_df.apply(sublst, axis=1) target_fp = [x for x in list(m_df) if str(x).__contains__('FP')] target_u = [x for x in list(m_df) if str(x).__contains__('U')] aggregation(m_df, target_fp, 'FP', time_b) aggregation(m_df, target_u, 'U', time_b) flush_csv(out_dir, m_df, ky)
def add_loc(project_name, pass_loc=False): csv_p = '/home/ise/bug_miner/{}/fin_df_buggy.csv'.format(project_name) df_fin = pd.read_csv(csv_p, index_col=0) p_name = str(csv_p).split('/')[-2] father_dir = '/'.join(str(csv_p).split('/')[:-1]) out_loc = pt.mkdir_system(father_dir, 'LOC', False) repo_path = "{}/{}".format('/'.join(str(csv_p).split('/')[:-1]), p_name) print repo_path df_info = pd.read_csv("{}/tmp_files/{}_bug.csv".format( os.getcwd(), p_name), index_col=0) list_bug_generated = df_fin['bug_name'].unique() print list(df_info) print len(df_info) df_info = df_info[df_info['issue'].isin(list_bug_generated)] if pass_loc is False: df_info.apply(add_loc_helper, repo=repo_path, out=out_loc, axis=1) # get all df loc from LOC folder res_df_loc_path = pt.walk_rec(out_loc, [], '.csv') all_loc_list = [] for item_loc_path in res_df_loc_path: all_loc_list.append(pd.read_csv(item_loc_path, index_col=0)) df_all_loc = pd.concat(all_loc_list) print list(df_all_loc) print list(df_fin) print len(df_fin) df_all_loc.to_csv('{}/{}.csv'.format(father_dir, 'loc')) else: df_all_loc = pd.read_csv('{}/{}.csv'.format(father_dir, 'loc'), index_col=0) result_df = pd.merge(df_all_loc, df_fin, 'right', on=['bug_name', 'name']) result_df.to_csv('{}/{}.csv'.format(father_dir, 'exp')) print len(result_df)
def loging_os_command(path_target, dir_name, msg, file_name): if path_target[-1] == '/': path_target = path_target[:-1] if os.path.isdir("{}/{}".format(path_target, dir_name)) is False: out_d = pt.mkdir_system(path_target, dir_name, False) else: out_d = "{}/{}".format(path_target, dir_name) with open("{}/{}.log".format(out_d, file_name), 'w') as f_log: f_log.write("[log] {}\n".format(msg))
def self_complie_bulider_func(repo, dir_cur, prefix, suffix='fix', bug_id=''): if os.path.isdir("{}/EVOSUITE".format(dir_cur)): d = {} java_dirz = pt.walk_rec("{}/EVOSUITE".format(dir_cur), [], '', False, lv=-1) for item in java_dirz: if os.path.isdir("{}/{}".format(item, prefix)): name_folder = str(item).split('/')[-1] tmp = pt.walk_rec("{}/{}".format(item, prefix), [], '.java') path2 = '/'.join(str(tmp[0]).split('/')[:-1]) tmp = str(name_folder).split('_') name_folder = 'test_suite_t_{}_it_{}'.format( tmp[-2].split('=')[1], tmp[-1].split('=')[1]) d[name_folder] = { 'name': name_folder, 'path': "{}/{}/*".format(item, prefix), 'path2': '{}/*'.format(path2) } else: print "[error] no dir {}/EVOSUITE".format(dir_cur) return None d_adder = {'bug_id': str(dir_cur).split('/')[-1], 'mode': suffix} res, path_jarz = package_mvn_cycle(repo) if path_jarz is None: return remove_junit(path_jarz) out_path_complie = pt.mkdir_system(dir_cur, 'complie_out_{}'.format(suffix)) out_path_junit = pt.mkdir_system(dir_cur, 'junit_out_{}'.format(suffix)) for ky_i in d.keys(): out_i_complie = pt.mkdir_system(out_path_complie, d[ky_i]['name']) out_i_junit = pt.mkdir_system(out_path_junit, d[ky_i]['name']) indep_bulilder.compile_java_class(d[ky_i]['path2'], out_i_complie, path_jarz) report_d = indep_bulilder.test_junit_commandLine("{}/{}".format( out_i_complie, 'test_classes'), path_jarz, out_i_junit, prefix_package=prefix, d_add=d_adder) print "end"
def rearrange_folder_conf_xgb( p_path_dir='/home/ise/bug_miner/XGB/Lang_DATA/csv_res/TEST'): res_csv_all = pt.walk_rec(p_path_dir, [], '.csv') for i in res_csv_all: tmp = str(i).split('/')[-1].split('_') num_conf = tmp[-2] path_conf_dir = pt.mkdir_system(p_path_dir, 'conf_{}'.format(num_conf), False) os.system('mv {} {}'.format(i, path_conf_dir)) exit()
def making_pred(p_name='Lang',out='/home/ise/eran/JARS' ,root_jat_dir='/home/ise/eran/JARS/JARS_D4J',dis_factor=0.01, csv_FP='/home/ise/eran/repo/ATG/D4J/FP',k=4,alpha=0.009,beta=0.0001,loc='LOC',gama=0.1,debug=True): df = pd.read_csv("{0}/{1}/{1}.csv".format(csv_FP, p_name), index_col=0) print "df size:\t{}".format(len(df)) print df.dtypes if debug: out_root_debug = pt.mkdir_system(out,"debug_A_{}_B_{}_loc_{}_ds_{}".format( alpha,beta,loc,dis_factor)) res_list=[] res = pt.walk_rec(root_jat_dir, [], 'P_',False,lv=-1) for item in res: print item p_name_i = str(item).split('/')[-1].split('_')[1] b_time = str(item).split('/')[-1].split('_')[5] bug_id = str(item).split('/')[-1].split('_')[3] index_test = str(item).split('/')[-1].split('_')[7] date_time= '_'.join(str(item).split('/')[-1].split('_')[9:]) df_loc = find_loc_componenets(p_name_i, bug_id) if debug: out_debug = pt.mkdir_system(out_root_debug,str(item).split('/')[-1]) print "----{}----".format(bug_id) df_filter = df.loc[df['bug_ID'] == int(bug_id)] print "df size:\t{}".format(len(df_filter)) dict_test_picked = heuristic_process(df_filter, item, df_loc,k,gama=gama,loc=loc, alpha=alpha,beta=beta,debug_dir=out_debug,discount_factor=dis_factor, f_name="{}_B_{}_K".format(p_name,bug_id)) if dict_test_picked is None: continue for test_i_key in dict_test_picked.keys(): if dict_test_picked[test_i_key]['pick']==0: continue kill_sum,all_rep = get_rep_kill_out_raw_by_name(test_i_key,df_filter ) index_test_pick = dict_test_picked[test_i_key]['index'] res_list.append({'bug_ID':bug_id,'project':p_name_i,'time_budget':b_time,'k':k,"discount_factor":dis_factor, 'alpha':alpha,'beta':beta,'test_picked':test_i_key,'index_gen_suite':index_test,'loc_mode':loc, "index_pick_test":index_test_pick ,"date_time":date_time,'sum_detected':kill_sum, 'count_detected':all_rep}) df_final = pd.DataFrame(res_list) df_final.to_csv('{}/heuristic_P_{}_A_{}_B_{}_loc {}_Dfact_{}.csv'.format(out,p_name,alpha,beta,loc,dis_factor))
def remvoe_unkillable_mutations(csv_big, debug=False, proj='proj'): print csv_big df = pd.read_csv(csv_big) out_dir = pit_render_test.mkdir_system( '/'.join(str(csv_big).split('/')[:-1]), 'killable_{}'.format(proj), True) #res = find_biggest_time_b(df,False) df_cut = remove_unkillable(df, debug, out_dir) xm.ana_big_df_all(df_cut, out_dir)
def evo_test_run(out_evo, mvn_repo, moudle, project_dir, mode='fix', prefix_str='org'): p_name = str(mvn_repo).split('/')[-1] out_evo = '/'.join(str(out_evo).split('/')[:-1]) res = pt.walk_rec(out_evo, [], 'org', False) if len(res) == 0: return test_dir = get_test_dir(project_dir) rm_exsiting_test(project_dir, p_name, prefix_str=prefix_str) for path_res in res: command_cp_test = "cp -r {} {}".format(path_res, test_dir) dir_name_evo = str(path_res).split('/')[-2] print "[OS] {}".format(command_cp_test) out_log = pt.mkdir_system(out_evo, 'LOG', False) os.system(command_cp_test) mvn_command(mvn_repo, moudle, 'install', out_log) # mvn_command(mvn_repo,moudle,'test-compile',out_log) # mvn_command(mvn_repo, moudle, 'test', out_log) # moving the results to the evo_out dir test_dir_suff = "{}/target/surefire-reports".format(project_dir) res_test_file = pt.walk_rec(test_dir_suff, [], '.xml') # filter only the evo_suite test res_test_file = [ x for x in res_test_file if str(x).split('/')[-1].__contains__('_ESTest') ] out_results = pt.mkdir_system(out_evo, 'Result', False) out_results_evo = pt.mkdir_system(out_results, "{}_{}".format(dir_name_evo, mode), False) for test_item in res_test_file: command_mv = "mv {} {}".format(test_item, out_results_evo) print "[OS] {}".format(command_mv) os.system(command_mv) rm_exsiting_test(project_dir, p_name, prefix_str=prefix_str)
def packages_agg(path, df_index): out_path = '/'.join(str(path).split('/')[:-1]) df = pd.read_csv(path, index_col=0) res_df = pd.merge(df, df_index, on=['ID'], how='outer') res_df['package'] = res_df['class'].apply( lambda x: '.'.join(str(x).split('.')[:-1])) res_df_sum = res_df.groupby(['package']).sum() res_df_miss = res_df.groupby(['package' ]).apply(lambda x: x.notnull().sum()) res_df_mean = res_df.groupby(['package']).mean() dir_out = pit_render_test.mkdir_system(out_path, 'package_agg', True) flush_csv(dir_out, res_df_miss, 'df_miss') flush_csv(dir_out, res_df_mean, 'df_mean') flush_csv(dir_out, res_df_sum, 'df_sum')
def make_jar_file(project_dir_path): ''' make a jar file with the builder mvn or ant ''' fix_dir = '{}/fixed'.format(project_dir_path) log_dir = '{}/log'.format(project_dir_path) mvn_builder = False ant_builder = False if os.path.isfile('{}/pom.xml'.format(fix_dir)): mvn_builder = True if os.path.isfile('{}/build.xml'.format(fix_dir)): ant_builder = True os.chdir(fix_dir) out_jar = pt.mkdir_system(project_dir_path, 'jar_dir', False) if mvn_builder: command = 'mvn package -Dmaven.test.skip=true' process = Popen(shlex.split(command), stdout=PIPE, stderr=PIPE) stdout, stderr = process.communicate() loging_os_command(log_dir, 'jar_command', stdout, "stdout") loging_os_command(log_dir, 'jar_command', stderr, "stderr") # os.system(command) ans = pt.walk_rec("{}/target".format(fix_dir), [], '.jar') command = 'mvn dependency:copy-dependencies -DoutputDirectory={}'.format( out_jar) process = Popen(shlex.split(command), stdout=PIPE, stderr=PIPE) stdout, stderr = process.communicate() loging_os_command(log_dir, 'copy_dependencies', stdout, "stdout") loging_os_command(log_dir, 'copy_dependencies', stderr, "stderr") #os.system(command) if len(ans) == 1: cp_command = 'mv {} {}'.format(ans[0], out_jar) print '[OS] {}'.format(cp_command) os.system(cp_command) return ans[0] if ant_builder: command = 'ant jar' process = Popen(shlex.split(command), stdout=PIPE, stderr=PIPE) stdout, stderr = process.communicate() loging_os_command(log_dir, 'jar_command', stdout, "stdout") loging_os_command(log_dir, 'jar_command', stderr, "stderr") # os.system(command) ans = pt.walk_rec("{}/target".format(fix_dir), [], '.jar') if len(ans) == 1: cp_command = 'mv {} {}'.format(ans[0], out_jar) print '[OS] {}'.format(cp_command) os.system(cp_command) return ans[0] return None
def get_killable_bugs(p_name='Math', out='/home/ise/eran/out_csvs_D4j'): out_p = pt.mkdir_system(out, 'killable', False) csv_path = '/home/ise/eran/out_csvs_D4j/rep_exp/out_{}.csv'.format(p_name) df = pd.read_csv(csv_path) csv_commit_db_path = '/home/ise/programs/defects4j/framework/projects/{}/commit-db'.format( p_name) commit_db = pd.read_csv( csv_commit_db_path, names=['bug_ID', 'buggy_commit_id', 'fixed_commit_id']) df = df.loc[df['kill_val'] > 0] print list(df) print list(commit_db) df_mereg = pd.merge(df, commit_db, on=['bug_ID'], how='right') out_df = df_mereg[['bug_ID', 'buggy_commit_id', 'fixed_commit_id']] out_df.drop_duplicates(inplace=True) out_df.to_csv('{}/{}_killable_commit_ID.csv'.format(out_p, p_name))
def mk_call_graph_raw_data(root_dir, name_find='jars_dir', java_caller='/home/ise/programs/java-callgraph/target/javacg-0.1-SNAPSHOT-static.jar'): res = pt.walk_rec(root_dir, [], name_find, False) for dir_i in res: father_dir = '/'.join(str(dir_i).split('/')[:-1]) jars = pt.walk_rec(dir_i, [], '.jar') if len(jars) != 2: print "[Error] in dir --> {}\nfind:\n{}".format(dir_i, jars) continue out_jars = pt.mkdir_system(father_dir, 'out_jar') command_java_1 = 'java -jar {} {} {} '.format(java_caller, jars[1], father_dir) command_java_0 = 'java -jar {} {} {} '.format(java_caller, jars[0], father_dir) util_d4j.execute_command(command_java_1, 'call_graph', out_jars) util_d4j.execute_command(command_java_0, 'call_graph', out_jars)
def first_kill(p_path='/home/ise/eran/lang/U_lang_15/big_all_df.csv'): rel_path = '/'.join(str(p_path).split('/')[:-1]) out_dir = pit_render_test.mkdir_system(rel_path, 'first_kill', False) df = pd.read_csv(p_path, index_col=0) get_col_time_budget = list(df) get_col_time_budget = [ x for x in get_col_time_budget if str(x).__contains__('t=') ] get_col_time_budget_U = [ x for x in get_col_time_budget if str(x).__contains__('_U') ] for x in get_col_time_budget_U: print x df['First'] = df.apply(my_test2, cols=get_col_time_budget_U, axis=1) df['freq'] = df.apply(my_test1, cols=get_col_time_budget_U, axis=1) df[['ID', 'First', 'freq']].to_csv('{}/first_kill.csv'.format(out_dir)) exit()
def make_bug_date_D4j(out='/home/ise/tmp_d4j'): ''' making a CSV file with date of all the projects bugs ''' projects = ['Chart', 'Math', 'Lang', 'Time', 'Mockito', 'Closure'] out_dir = pt.mkdir_system(out, 'D4j_bugs_info', False) for name in projects: list_data = [] total_of_bugs = project_dict[name]['num_bugs'] for bug_i in range(1, total_of_bugs + 1): date_i = extract_data(name, bug_i) list_data.append({ 'project': name, 'bug_ID': bug_i, 'Date': date_i }) df = pd.DataFrame(list_data) df.to_csv('{}/{}_Date.csv'.format(out_dir, name))
def replication( dir_out_xml='/home/ise/eran/lang/U_lang_15/18_00_00_00_00_t=15/out_xml_all', killable=True): ''' make CSV conatins repleication and the results :param dir_out_xml: :return: making a dir ''' rel_path = '/'.join(str(dir_out_xml).split('/')[:-1]) out_dir = pit_render_test.mkdir_system(rel_path, 'replication_kill', False) csv_lists = pit_render_test.walk_rec(dir_out_xml, [], '.csv') acc = 0 big_df = None for csv_item in csv_lists: print "csv_item =", csv_item df = pd.read_csv(csv_item, index_col=0) list_col = list(df) list_col = [x for x in list_col if str(x).__contains__('_it=')] list_col.append('ID') df = df[list_col] acc += int(len(df)) if big_df is None: big_df = df else: big_df = pd.concat([big_df, df]) if acc != int(len(big_df)): print "acc: {} big: {}".format(acc, int(len(big_df))) if killable: df_first_kill = pd.read_csv( '/home/ise/eran/lang/first_kill.csv' ) #TODO: read the CSV frist kill and merge the ID and remove all ids that cant be kill df_all = df_first_kill.merge(big_df, on=['ID'], how='outer', indicator=True) print df_all['_merge'].value_counts() df_all = df_all[df_all['First'] > 0] col_ALL = list(df_all) col_ALL = [x for x in col_ALL if str(x).__contains__('ALL_')] col_ALL.append('ID') flush_csv(out_dir, df_all, 'merge') flush_csv(out_dir, df_all[col_ALL], 'all_rep_killable') else: flush_csv(out_dir, big_df, 'all_rep')
def init_main(): #collctor('/home/ise/eran/random/LANG/tran_FP','FP_summery') #collctor('/home/ise/eran/random/LANG/tran_U', 'U_summery') #exit() print "starting.." for mod in ['FP', 'U']: num = 5000 ch_i = 1 #get_ID_index_table('/home/ise/tran') out_path = pit_render_test.mkdir_system('/home/ise/Desktop/tmp/LANG', 'tran_{}'.format(mod), False) p_path = '/home/ise/eran/lang/big_all_df.csv' p_index = '/home/ise/eran/lang/indexer.csv' csv_fp_file = '/home/ise/eran/repo/ATG/csv/FP_budget_time_lang.csv' #csv_fp_file = '/home/ise/eran/repo/ATG/csv/FP_budget_time_math.csv' bugger_obj = bugger(p_path, csv_fp_file, out_path, p_index) #bugger_obj.package_separation(num,mod) #continue arr = bugger_obj.bug_generator(num, mod) df = bugger_obj.get_bug_DataFrame_V1(arr, ch_i, mod, num) bugger_obj.get_plot(df, ch_i) bugger_obj.get_plot(df, ch_i, 'Sum') flush_csv(out_path, df, 'n_{}_mod_{}'.format(num, mod)) exit()
def applyer_bug(row, out_dir, repo, list_index, jarz=True, prefix_str='org', self_complie=True): fix = False git_repo = repo p_name = str(repo).split('/')[-1] tag_parent = row['tag_parent'] module = row['module'] commit_buggy = row['parent'] # old commit_fix = row['commit'] # new bug_name = row['issue'] index_bug = row['index_bug'] component_path = row['component_path'] print 'index_bug = {}'.format(index_bug) # if index_bug > 150: # return print "{}".format(component_path) ###### #list_done = start_where_stop_res(out_dir) if list_index is not None: if index_bug not in list_index: return ########## target = row['target'] # if os.path.isdir("{}/{}_{}".format(out_dir,bug_name,index_bug)): # fix=True # else: # if self_complie: # return out_dir_new = pt.mkdir_system(out_dir, "{}_{}".format(bug_name, index_bug), False) out_evo = pt.mkdir_system(out_dir_new, 'EVOSUITE', False) path_to_pom = "{}/pom.xml".format(repo) print "module={} \t tag_p = {} \t commit_p ={}".format( module, tag_parent, commit_fix) checkout_version(commit_fix, git_repo, out_dir_new) # open-nlp name = str(repo).split('/')[-1] if os.path.isfile('{}/pom.xml'.format(repo)) is False: if os.path.isdir('{}/{}'.format(repo, name)): repo = '{}/{}'.format(repo, name) else: if os.path.isfile('{}/{}'.format(repo, 'build.xml')): ant_command(repo, 'ant compile') jarz = False else: return if self_complie: if is_evo_dir_full(out_evo) is False: # no tests were generated bt Evosuite return self_complie_bulider_func(repo, "{}/{}_{}".format(out_dir, bug_name, index_bug), prefix_str, suffix='fixed') checkout_version(commit_fix, git_repo, out_dir_new, clean=True) mvn_command(repo, module, 'clean', None) checkout_version(commit_buggy, git_repo, out_dir_new) self_complie_bulider_func(repo, "{}/{}_{}".format(out_dir, bug_name, index_bug), prefix_str, suffix='buggy') checkout_version(commit_buggy, git_repo, out_dir_new, clean=True) mvn_command(repo, module, 'clean', None) return proj_dir = '/'.join(str(path_to_pom).split('/')[:-1]) prefix = src_to_target(component_path, end=prefix_str) if prefix is None: return repo_look = "{}{}".format(git_repo, prefix) rm_exsiting_test(repo_look, p_name, prefix_str=prefix_str) out_log = pt.mkdir_system(out_dir_new, 'LOG', False) # reset the commit mvn_command(repo, module, 'clean', None) mvn_command(repo, module, 'install -DskipTests=true', out_log, '') # Get all jars dependency str_dependency = '' if jarz: res, path_dep = package_mvn_cycle(repo) if path_dep is None: print "[Error] cant make jarzz" return res = clean_jar_path(res) str_dependency = ':'.join(res) discover_dir_repo('{}/target'.format(repo_look), p_name, is_test=False) if str(module).__contains__('-'): path_to_pom = '{}/{}/pom.xml'.format(repo, module) dir_to_gen = '{}/{}/target/classes/{}'.format(repo, module, target) dir_to_gen = discover_dir_repo('{}/{}'.format(repo, module), p_name, is_test=False, prefix_str=prefix_str) else: dir_to_gen = discover_dir_repo('{}'.format(repo_look), p_name, is_test=False, prefix_str=prefix_str) dir_to_gen = '{}/{}'.format(dir_to_gen, target) # Run Evosuite generation mode # add Evosuite to pom xml get_all_poms_and_add_evo(repo) sys.argv = [ '.py', dir_to_gen, 'evosuite-1.0.6.jar', '/home/ise/eran/evosuite/jar/', out_evo + '/', 'exp', '200', '1', '180', '5', 'U', str_dependency ] if fix is False: bg.init_main() evo_test_run(out_evo, repo, module, proj_dir, mode='fixed', prefix_str=prefix_str) checkout_version(commit_fix, git_repo, out_dir_new, clean=True) # Run test-suite on the buugy version checkout_version(commit_buggy, git_repo, out_dir_new) rm_exsiting_test(repo_look, p_name, prefix_str=prefix_str) mvn_command(repo, module, 'clean', out_log) mvn_command(repo, module, 'compile', out_log) # add Evosuite to pom xml #add_evosuite_text(path_to_pom, None) get_all_poms_and_add_evo(repo) evo_test_run(out_evo, repo, module, proj_dir, mode='buggy', prefix_str=prefix_str) checkout_version(commit_buggy, git_repo, out_dir_new, clean=True) # rm pom.xml for the next checkout mvn_command(repo, module, 'clean', out_log)
def jar_making_process(src_dir): all_project = pt.walk_rec(src_dir, [], 'V_fixed', False) for proj_i in all_project: out_i = '/'.join(str(proj_i).split('/')[:-3]) out_dir_jar = pt.mkdir_system(out_i, 'jars_dir') make_jars(proj_i, out_dir_jar)
def test_junit_commandLine(dir_class, dir_jars, out_dir, prefix_package='org', d_add=None): ''' this function go over all the test clases and run the Junit test :param dir_class: :param dir_jars: :return: None ''' out = pt.mkdir_system(out_dir, 'junit_output') running_dir = None files_jars = pt.walk_rec(dir_jars, [], '.jar') files_jars.append( '/home/ise/eran/evosuite/jar/evosuite-standalone-runtime-1.0.6.jar') files_jars.append(dir_class) jars_string = ':'.join(files_jars) tests_files = pt.walk_rec(dir_class, [], '.class') tests_files = [ x for x in tests_files if str(x).__contains__('_scaffolding') is False ] d = {} for item in tests_files: split_arr = str(item).split('/') name = split_arr[-1].split('.')[0] split_arr[-1] = name index = split_arr.index('test_classes') + 1 if index >= 0: package_string = '.'.join(split_arr[index:]) running_dir = '/'.join(split_arr[:index]) d[package_string] = {'item': item, 'chdir': running_dir} else: msg = 'no prefix package {} in the path: {}'.format( prefix_package, item) raise Exception(msg) command_Junit = "java -cp {} org.junit.runner.JUnitCore".format( jars_string) d_res = [] for ky in d: os.chdir(d[ky]['chdir']) print ky final_command = "{} {}".format(command_Junit, ky) print final_command process = Popen(shlex.split(final_command), stdout=PIPE, stderr=PIPE) stdout, stderr = process.communicate() print "----stdout----" print stdout print "----stderr----" print stderr d_tmp = { 'out': stdout, 'err': stderr, 'status': parser_std_out_junit(stdout), 'class': ky } if d_add is not None: for key_d in d_add.keys(): d_tmp[key_d] = d_add[key_d] d_res.append(d_tmp) with open('{}/{}_out_test.txt'.format(out, ky), 'w') as f: f.write("stdout:\n{}stderr:\n{}".format(stdout, stderr)) for k in d_res: print "{}".format(k) reporting_csv(out_dir, d_res) return d_res
def matrix_analysis(root_path_project, root_stat, k=3, on='FP', out='/home/ise/eran/lang/out_rev/'): df_sum = [] d_fp = load__data( root_path_project) # load all the package of the prefix packages name_project = project_name(root_path_project) # get the project name out_path = pt.mkdir_system(out, name_project, False) # create dir for the output df_stat = pd.read_csv(root_stat) # load the stat DataFrame diced_to_prefix(df_stat, d_fp) # make packages stat dict for ky in d_fp.keys(): print '--------------------------on={}---------------------------'.format( on) print "name----->name_project: {}".format(name_project) print "KEY : {}".format(ky) ##print list(df_stat) df_filter = df_stat.loc[df_stat['prefix'] == ky] package_size = len(df_filter) df_filter = df_filter.loc[df_filter['test'] == 1] package_size_actual_test = len(df_filter) df_filter = df_filter.loc[df_filter['pit_xml'] == 1] package_size_actual_pit = len(df_filter) print list(df_filter) loc = df_filter['loc_TEST'].sum() if len(df_filter) > k: # print "ky: {} len: {}".format(ky,len(df_filter)) print k df_cut = get_top_k(df_filter, on, k) else: df_cut = df_filter target_list = ['ID'] if df_cut is None: # TODO:FIX IT !!!!! raise Exception("[Error] the df cut is empty == None") target_list.extend(df_cut['class'].tolist()) print "df_CUT\n\t{}".format(list(df_cut)) package_df = d_fp[ky] print list(package_df) res_package_df = package_df[target_list] size_bug = len(res_package_df) print 'size_bug: ', size_bug tmp_d = {} for x in target_list: if x == 'ID': continue out_pit = res_package_df[x].value_counts() out_pit = out_pit.to_dict() if ky not in tmp_d: tmp_d[ky] = out_pit else: d = tmp_d[ky] d_res = mereg_dico(d, out_pit) tmp_d[ky] = d_res tmp_d[ky]['package'] = ky tmp_d[ky]['Test_LOC'] = loc tmp_d[ky]['all_mutation'] = size_bug tmp_d[ky]['package_class_size'] = package_size tmp_d[ky]['package_size_actual_test'] = package_size_actual_test tmp_d[ky]['package_size_actual_pit'] = package_size_actual_pit df_sum.append(tmp_d[ky]) del tmp_d[ky] df = pd.DataFrame(df_sum) name = "k_{}_on_{}".format(k, on) df.to_csv("{}/res_{}.csv".format(out_path, name))