def missing_class_gen(root_class, root_test, java_src, log, pit=None, name='tmp', pit2=None): # full path for the test and the .class files scanner_class = pt.walk(root_class, ".class") scanner_java = pt.walk(java_src, ".java") scanner_tests = pt.walk(root_test, "ESTest.java") print "classes size ={}".format(len(scanner_class)) print "tests size ={}".format(len(scanner_tests)) # convert the full path to package format scanner_class_pak = [ pt.path_to_package('org', x, -6) for x in scanner_class ] scanner_tests_pak = [ pt.path_to_package('org', y, -12) for y in scanner_tests ] d = dict_diff(list_one=scanner_class_pak, list_two=scanner_tests_pak, path_root_test=root_test) look_at_test(scanner_java, scanner_tests, d) if pit is not None: miss_PIT(pit, d) if pit2 is not None: miss_target_pit(pit2, d) dff = make_df(d, log, name) return d
def get_all_project_D4J(path): projes_bug = pit_render_test.walk(path, "buggy", False) projes_fix = pit_render_test.walk(path, "fixed", False) projes = projes_fix + projes_bug if len(projes) == 0: print "no project in the following path : {}".format(path) return projes
def sum_all_stat_dir(p_path, mod='fp'): new_df_list = [] list_col = [ 'Empty_test_case', 'FP', 'empty_test', 'java(.class)', 'no_test', 'pit', 'test', 'no_test_Avg' ] all_dir = pt.walk(p_path, 'stat_r', False) for dir in all_dir: name = str(dir).split('/')[-2] print 'name=', name if len(name.split('_')) < 5: continue time = name.split('_')[5] time = time[2:] all_csvs = pt.walk(dir, 'Fin') all_csvs = [ x for x in all_csvs if str(x).split('/')[-1].__contains__('_{}_'.format(mod)) ] if len(all_csvs) > 1: raise Exception("Two file Fin in dir:{} mode allocation={}".format( dir, mod)) for csv_item in all_csvs: name_file = str(csv_item).split('/')[-1][:-4] mode = name_file.split('_')[1] size_dirs = name_file.split('_')[3] size_project = int(size_dirs) d = {} df = pd.read_csv(csv_item, index_col=0) df['avg__empty'] = df['empty_test'] / size_project list_col.extend(['avg__empty']) list_numric = list(df._get_numeric_data()) list_numric = [x for x in list_numric if x in list_col] d['dirs'] = size_dirs d['time'] = time d['allocation_mode'] = mode d['name'] = name d['size_project'] = size_project d['size'] = len(df) for x in list_numric: d[x] = df[x].sum() new_df_list.append(d) df = pd.DataFrame(new_df_list) if df.empty: print "no data to agg mode={}".format(mod) return None df['time'] = df['time'].apply(int) df = df.set_index(df['time']) df.drop('time', axis=1, inplace=True) df.sort_index(inplace=True) if p_path[-1] == '/': df.to_csv("{}fin_{}_stat.csv".format(p_path, mod)) else: df.to_csv("{}/fin_{}_stat.csv".format(p_path, mod))
def aggregation_res_matrix(path_dir): print "" d = [] list_files = pt.walk(path_dir, '.csv') for file_i in list_files: name_file = str(file_i).split('/')[-1][:-4] if str(name_file).__contains__('sum'): continue dir_name = str(file_i).split('/')[-2] arr = str(name_file).split('_') k_num = arr[2] criterion = arr[4] df = pd.read_csv(file_i, index_col=0) col_list = list(df) col_list.remove('package') sum_kill = df['KILLED'].sum() sum_all = df['all_mutation'].sum() d.append({ 'criterion': criterion, 'dir': dir_name, 'K': k_num, 'kill': sum_kill, 'all_bug': sum_all }) df_all = pd.DataFrame(d) df_all.sort_values(by=['K'], inplace=True) df_all.to_csv("{}/sum.csv".format(path_dir), index=False)
def rev_analysis_by_package(p_path, data_path=None, d_class=None): '''make csv by packages by dictionary object or by path csv dir ''' out_path_dir = mkdir_system(p_path, 'package', is_del=False) d = {} cur_d = d_class d_class_local = {} if data_path is not None: list_csvs = pit_render_test.walk(data_path, '.csv') for csv_item in list_csvs: name = str(csv_item).split('/')[-1][:-4] df = pd.read_csv(csv_item) d_class_local[name] = df cur_d = d_class_local print "in" for key in cur_d.keys(): xml_df = cur_d[key] if xml_df is not None: package_prefix = str(key).split('.')[:-1] package_prefix = '.'.join(package_prefix) if package_prefix not in d: d[package_prefix] = {} d[package_prefix][key] = xml_df print "done" merge_dfs(d, out_path_dir)
def merge_all_csvs(root_path): print '' csvs_class = pit_render_test.walk(root_path, 'csvs', False) dico_paths = {} for item_p in csvs_class: if item_p[-1] == '/': item_p = item_p[:-1] if os.path.isdir("{}/class".format(item_p)) is False: print "[Error] {}/class is not exist".format(item_p) continue classes_name = pit_render_test.walk("{}/class".format(item_p), '.csv') for klass in classes_name: name = str(klass).split('/')[-1][:-4] if name not in dico_paths: dico_paths[name] = [] dico_paths[name].append(klass) return dico_paths
def wrapper_class_analysis(root_path): size_p = len(str(root_path).split('/')) list_p = pit_render_test.walk(root_path, 't=', False) list_p = [x for x in list_p if str(x).__contains__('ALL_') is False] #list_p = [x for x in list_p if str(x).__contains__('=20_') ] for p in list_p: print p dico = merge_all_csvs(p) read_and_mereg(dico, p)
def func_start(main_root, mode='reg'): scan_obj = pt.walk(main_root, "t=", False, 0) li = [] for x in scan_obj: print x li.append(time_budget_analysis(x, mode)) df = pd.DataFrame(li) if main_root[-1] != '/': main_root = main_root + '/' df.to_csv(main_root + "class_analysis.csv")
def statistic_by_packaging(p_path): ''' make a static over all packages in the given project :param p_path: :return: ''' new_df_list = [] all_dir = pt.walk(p_path, 'stat_r', False) for dir in all_dir: pass
def load__data(root_data): print "" list_files = pt.walk(root_data, '.csv') d_dico = {} for item_csv in list_files: if str(item_csv).__contains__( 'org.apache.commons.math3.genetics') is False: pass prefix_name = str(item_csv).split('/')[-1][:-4] d_dico[prefix_name] = pd.read_csv(item_csv) return d_dico
def merge_by_packages_Roni(dir_root, out_path): ''' this function output a matrix with the columns target_col for each file configurations in the time_FP and time_U :param dir_root: :param out_path: :return: ''' print "" d = [] name = dir_root.split('/')[-1] target_cols = [ 'KILLED', 'all_mutation', 'package', 'Test_LOC', 'package_class_size', 'package_size_actual_pit', 'package_size_actual_test', 'criterion', 'allocation_mode', 'K', 'time_budget' ] list_files = pt.walk(dir_root, '.csv') all_dfs = None for file_i in list_files: name_file = str(file_i).split('/')[-1][:-4] arr = name_file.split('_') k = arr[2] criterion = arr[-1] if str(name_file).__contains__('sum'): continue dir_name = str(file_i).split('/')[-2] arr = dir_name.split('_') allocation_mode = arr[1] time_budget = arr[2][2:] df = pd.read_csv(file_i, index_col=0) df['K'] = k df['criterion'] = criterion df['time_budget'] = time_budget df['allocation_mode'] = allocation_mode df = df[target_cols] # df[target_cols].to_csv("/home/ise/eran/bbb.csv", index=False) if all_dfs is None: all_dfs = df # print "len: {}".format(len(all_dfs)) continue else: size_df = len(df) size_all_df = len(all_dfs) print "df:", size_df print "all_dfs:", size_all_df all_dfs = all_dfs.append(df) print "mereg: {}".format(len(all_dfs)) all_dfs = all_dfs.fillna(0.0) if out_path[-1] == '/': all_dfs.to_csv("{}by_package_{}.csv".format(out_path, name), index=False) else: all_dfs.to_csv("{}/by_package_{}.csv".format(out_path, name), index=False) exit()
def xml_replace(project_path='/home/ise/eran/xml/02_26_13_27_45_t=30_/pit_test/ALL_U_t=30_it=0_/commons-math3-3.5-src'): ''' if PIT output is xml, working on the csvs dir ''' class_csv = '{}/csvs/class'.format(project_path) list_csvs = pit_render_test.walk(class_csv,'.csv') err={} for item in list_csvs: name = str(item).split('/')[-1][:-4] df = pd.read_csv(item) print list(df) exit()
def add_all_big_df(root_p): list_p = pit_render_test.walk(root_p, 'big_df') if len(list_p) > 0: big_df_all = pd.read_csv(list_p[0], index_col=0) else: print "didnt find any big_df Dataframe in path:{}".format(root_p) return for p in list_p[1:]: df = pd.read_csv(p, index_col=0) big_df_all = pd.merge(big_df_all, df, on=['ID'], how='outer') print "all_df: {}".format(len(big_df_all)) return big_df_all
def miss_target_pit(path_PIT, dico): d = {} list_class_dir = pt.walk(path_PIT, '', False) for dir in list_class_dir: empty = 0 name = str(dir).split('/')[-1] files = pt.walk(dir, 'mutations') if len(files) == 1: if os.stat(files[0]).st_size < 1: empty = 1 suffix = files[0][-3:] if suffix == 'xml': d[name] = {'xml': 1, 'csv': 0, 'empty': empty} elif suffix == 'csv': d[name] = {'xml': 0, 'csv': 1, 'empty': empty} else: raise Exception('unreconzie suffix: (path)= {}'.format( files[0])) elif len(files) > 1: #for p_item in files: # print 'p_item:', p_item # if str(p_item).__contains__('201804'): # os.system('rm -r {}'.format(p_item)) raise Exception( 'more than on Mutations.xml/Mutations.xml in dir: {}'.format( dir)) else: d[name] = {'xml': 0, 'csv': 0, 'empty': empty} for ky in dico.keys(): if ky in d: dico[ky]['pit_xml'] = d[ky]['xml'] dico[ky]['pit_csv'] = d[ky]['csv'] dico[ky]['pit_empty_file'] = d[ky]['empty'] else: print 'in' dico[ky]['pit_xml'] = 0 dico[ky]['pit_csv'] = 0 dico[ky]['pit_empty_file'] = 0
def make_big_csv(root_p): list_p = pit_render_test.walk(root_p, 'out_xml_all', False) for p in list_p: print p cols = ['ID', 'KILL_Avg_FP', 'KILL_Sum_FP', 'KILL_Avg_U', 'KILL_Sum_U'] time_b = str(p).split('/')[-2].split('=')[1] for j in range(1, len(cols)): cols[j] = "t={}_{}".format(time_b, cols[j]) acc = 0 name = str(p).split('/')[-2].split('_')[-2] csv_lists = pit_render_test.walk(p, '.csv') big_df = pd.DataFrame(columns=cols) p = '/'.join(str(p).split('/')[:-1]) for csv_item in csv_lists: print "csv_item =", csv_item df = pd.read_csv(csv_item, index_col=0) df = df[cols] acc += int(len(df)) big_df = pd.concat([big_df, df]) if acc != int(len(big_df)): print "acc: {} big: {}".format(acc, int(len(big_df))) #print "[Good] big_df size: ", len(big_df) flush_csv(p, big_df, 'big_df_{}'.format(name)) print 'done'
def collctor(path, name_file): out_path = '/'.join(str(path).split('/')[:-1]) list_dico = [] csv_list = pit_render_test.walk(path, '.csv') for item_csv in csv_list: name = str(item_csv).split('/')[-1][:-4] df = pd.read_csv(item_csv, index_col=0) list_cols = list(df) list_cols = [x for x in list_cols if str(x).__contains__('t=')] d = {'package': name} for col in list_cols: d[col] = df[col].mean() list_dico.append(d) ans_df = pd.DataFrame(list_dico) flush_csv(out_path, ans_df, name_file)
def time_budget_analysis(path_root, mode): list_fp = [] list_u = [] res_scanner = pt.walk(path_root, "commons-math3-3.5-src", False) res_scanner = pt.walk_rec(path_root, [], "commons-", False, -3) p_path = path_root if p_path[-1] != '/': p_path = p_path + '/' if os.path.isdir("{}stat_r/".format(p_path)): os.system("rm -r {}".format(p_path + "stat_r/")) os.system("mkdir {}".format(p_path + "stat_r/")) log_path = "{}stat_r/".format(p_path) for i_path in res_scanner: # if str(i_path).__contains__("ALL_U") is False: # continue name_i = get_name_path(i_path, -2) allocation_mode = str(name_i).split('_')[1] time_budget = str(name_i).split('_')[2][2:] javas_path = "{}/src/main/java/org/".format(i_path) classes_path = "{}/target/classes/org/".format(i_path) tests_path = "{}/src/test/java/org/".format(i_path) pit_path2 = None pit_path = None if mode == 'rev': pit_path = "{}/csvs/class".format(i_path) else: pit_path2 = "{}/target/pit-reports/".format(i_path) df_i = missing_class_gen(root_class=classes_path, root_test=tests_path, java_src=javas_path, log=log_path, name=name_i, pit=pit_path, pit2=pit_path2) if allocation_mode == 'FP': list_fp.append(df_i) elif allocation_mode == 'U': list_u.append(df_i) else: raise Exception( 'No allocation mode is known in name:{} \n path:{}'.format( name_i, i_path)) if len(list_u) > 0: merge_df(list_u, log_path, 'u') if len(list_fp) > 0: merge_df(list_fp, log_path, 'fp') return None
def merge_by_packages(dir_root, out_path): print "" d = [] name = dir_root.split('/')[-1] target_cols = [ 'KILLED', 'all_mutation', 'package', 'package_class_size', 'package_size_actual_pit', 'package_size_actual_test' ] list_files = pt.walk(dir_root, '.csv') all_dfs = None for file_i in list_files: name_file = str(file_i).split('/')[-1][:-4] if str(name_file).__contains__('sum'): continue dir_name = str(file_i).split('/')[-2] arr = str(name_file).split('_') k_num = arr[2] criterion = arr[4] df = pd.read_csv(file_i, index_col=0) col_list = list(df) name_col = "K_{}_mode_{}_dir_{}".format(k_num, criterion, dir_name) print list(df) df = df[target_cols] df.rename(columns={'KILLED': '{}_{}'.format(name_col, 'kill')}, inplace=True) # df.rename(columns={'all_mutation': '{}_{}'.format(name_col,'all_bug')}, inplace=True) if all_dfs is None: all_dfs = df print "len: {}".format(len(all_dfs)) continue else: all_dfs = pd.merge(all_dfs, df, on=[ 'package', 'package_class_size', 'package_size_actual_pit', 'package_size_actual_test', 'all_mutation' ]) print "len: {}".format(len(all_dfs)) if out_path[-1] == '/': all_dfs.to_csv("{}by_package_{}.csv".format(out_path, name), index=False) else: all_dfs.to_csv("{}/by_package_{}.csv".format(out_path, name), index=False)
def get_ID_index_table(root_path): res = pit_render_test.walk(root_path, 'index_er') index_df = pd.DataFrame(columns=['ID', 'mutatedClass']) if len(res) > 0: index_df = pd.read_csv(res[0], index_col=0) print "size:{}".format(len(index_df)) for csv_p in res[1:]: df = pd.read_csv(csv_p, index_col=0) index_df = pd.merge(index_df, df, on=['ID', 'mutatedClass'], how='outer') print "size:{}".format(len(index_df)) index_df.rename(columns={'mutatedClass': '{}'.format('class')}, inplace=True) flush_csv(root_path, index_df, 'indexer') print "done" return True
def get_outputs_test(self, clean=True): if clean: os.chdir(self.mvn_path) os.system("mvn clean test >> out_test_start.txt 2>&1") if (os.path.isdir(self.mvn_path + self.test_dir)): all_xml = pit_render_test.walk( self.mvn_path + self.test_dir, ".xml", ) if len(all_xml) == 0: print "[Error] No XML files found in {}".format(self.mvn_path + self.test_dir) return None return all_xml else: print "[Error] No directory {0} in {1}".format( self.test_dir, os.getcwd()) return None
def miss_PIT(path_PIT, dico): d = {} if os.path.isdir(path_PIT) is False: return # path_PIT = '/home/ise/eran/xml/02_26_13_27_45_t=30_/pit_test/ALL_FP_t=30_it=0_/commons-math3-3.5-src/csvs/class' list_class_csv = pt.walk(path_PIT, '.csv') for csv in list_class_csv: name = str(csv).split('/')[-1][:-4] if os.stat(csv).st_size == 0: d[name] = 0 list_class_csv.remove(csv) else: d[name] = 1 for ky in dico.keys(): if ky in d: dico[ky]['pit'] = d[ky] else: dico[ky]['pit'] = 0
def get_all_xml(path, root_path_project, mod): d_class = {} print "-" * 30 print path cols = ['ID', 'mutatedClass'] err = {} index_df = pd.DataFrame(columns=cols) err_name = {} out_path_dir = mkdir_system(path, 'class', is_del=True) out_path_index = mkdir_system(path, 'index', is_del=True) list_xml = pit_render_test.walk(root_path_project, 'mutations.xml') if list_xml is None: print "[Error] no mutations xmls found in the following path --> {}".format( root_path_project) return {} all = len(list_xml) #x_list =[] #for x in list_xml: # if str(x).__contains__('SphericalCoordinat'): # x_list.append(x) #list_xml= x_list df_dico_log = [] for x_xml in list_xml: print all all = all - 1 if len(x_xml) < 1: continue name_file = str(x_xml).split('/')[-2] #print "name: ",name_file xml_df, test_name = pars_xml_to_csv(x_xml, mod) #bulid the index data-frame if xml_df is None: #d_class[name_file] = None df_dico_log.append({'class': name_file, 'info': 'empty xml'}) print "empty xml file in class: {}".format(name_file) continue index_df = pd.concat([xml_df[cols], index_df]) if test_name is not None and test_name != name_file: print "[Error] {} != {}".format(name_file, test_name) df_dico_log.append({ 'class': name_file, 'info': 'file contain {}'.format(test_name) }) err_name[test_name] = name_file err[test_name] = xml_df continue flush_csv(out_path_dir, xml_df, name_file) d_class[name_file] = xml_df print err_name del_klass = [] for key in err.keys(): xml_df = err[key] name_file = key flush_csv(out_path_dir, xml_df, name_file) if key in d_class: df_dico_log.append({ 'class': name_file, 'info': 'overwrite'.format(name_file) }) d_class[name_file] = xml_df flush_csv(out_path_index, index_df, 'index_er') if len(df_dico_log) > 0: log_df = pd.DataFrame(df_dico_log) print "len_log", len(df_dico_log) flush_csv(out_path_index, log_df, 'log') return d_class
def get_class_size(root_path): walker_obj = pt.walk(root_path, "")