Пример #1
0
def missing_class_gen(root_class,
                      root_test,
                      java_src,
                      log,
                      pit=None,
                      name='tmp',
                      pit2=None):
    # full path for the test and the .class files
    scanner_class = pt.walk(root_class, ".class")
    scanner_java = pt.walk(java_src, ".java")
    scanner_tests = pt.walk(root_test, "ESTest.java")
    print "classes size ={}".format(len(scanner_class))
    print "tests size ={}".format(len(scanner_tests))
    # convert the full path to package format
    scanner_class_pak = [
        pt.path_to_package('org', x, -6) for x in scanner_class
    ]
    scanner_tests_pak = [
        pt.path_to_package('org', y, -12) for y in scanner_tests
    ]
    d = dict_diff(list_one=scanner_class_pak,
                  list_two=scanner_tests_pak,
                  path_root_test=root_test)
    look_at_test(scanner_java, scanner_tests, d)
    if pit is not None:
        miss_PIT(pit, d)
    if pit2 is not None:
        miss_target_pit(pit2, d)
    dff = make_df(d, log, name)
    return d
Пример #2
0
def get_all_project_D4J(path):
    projes_bug = pit_render_test.walk(path, "buggy", False)
    projes_fix = pit_render_test.walk(path, "fixed", False)
    projes = projes_fix + projes_bug
    if len(projes) == 0:
        print "no project in the following path : {}".format(path)
    return projes
Пример #3
0
def sum_all_stat_dir(p_path, mod='fp'):
    new_df_list = []
    list_col = [
        'Empty_test_case', 'FP', 'empty_test', 'java(.class)', 'no_test',
        'pit', 'test', 'no_test_Avg'
    ]
    all_dir = pt.walk(p_path, 'stat_r', False)
    for dir in all_dir:
        name = str(dir).split('/')[-2]
        print 'name=', name
        if len(name.split('_')) < 5:
            continue
        time = name.split('_')[5]
        time = time[2:]
        all_csvs = pt.walk(dir, 'Fin')
        all_csvs = [
            x for x in all_csvs
            if str(x).split('/')[-1].__contains__('_{}_'.format(mod))
        ]
        if len(all_csvs) > 1:
            raise Exception("Two file Fin in dir:{} mode allocation={}".format(
                dir, mod))
        for csv_item in all_csvs:
            name_file = str(csv_item).split('/')[-1][:-4]
            mode = name_file.split('_')[1]
            size_dirs = name_file.split('_')[3]
            size_project = int(size_dirs)
            d = {}
            df = pd.read_csv(csv_item, index_col=0)
            df['avg__empty'] = df['empty_test'] / size_project
            list_col.extend(['avg__empty'])
            list_numric = list(df._get_numeric_data())
            list_numric = [x for x in list_numric if x in list_col]
            d['dirs'] = size_dirs
            d['time'] = time
            d['allocation_mode'] = mode
            d['name'] = name
            d['size_project'] = size_project
            d['size'] = len(df)
            for x in list_numric:
                d[x] = df[x].sum()
            new_df_list.append(d)
    df = pd.DataFrame(new_df_list)
    if df.empty:
        print "no data to agg mode={}".format(mod)
        return None
    df['time'] = df['time'].apply(int)
    df = df.set_index(df['time'])
    df.drop('time', axis=1, inplace=True)
    df.sort_index(inplace=True)
    if p_path[-1] == '/':
        df.to_csv("{}fin_{}_stat.csv".format(p_path, mod))
    else:
        df.to_csv("{}/fin_{}_stat.csv".format(p_path, mod))
Пример #4
0
def aggregation_res_matrix(path_dir):
    print ""
    d = []
    list_files = pt.walk(path_dir, '.csv')
    for file_i in list_files:

        name_file = str(file_i).split('/')[-1][:-4]
        if str(name_file).__contains__('sum'):
            continue
        dir_name = str(file_i).split('/')[-2]
        arr = str(name_file).split('_')
        k_num = arr[2]
        criterion = arr[4]
        df = pd.read_csv(file_i, index_col=0)
        col_list = list(df)
        col_list.remove('package')
        sum_kill = df['KILLED'].sum()
        sum_all = df['all_mutation'].sum()
        d.append({
            'criterion': criterion,
            'dir': dir_name,
            'K': k_num,
            'kill': sum_kill,
            'all_bug': sum_all
        })
    df_all = pd.DataFrame(d)
    df_all.sort_values(by=['K'], inplace=True)
    df_all.to_csv("{}/sum.csv".format(path_dir), index=False)
Пример #5
0
def rev_analysis_by_package(p_path, data_path=None, d_class=None):
    '''make csv by packages
    by dictionary object or by path csv dir
    '''
    out_path_dir = mkdir_system(p_path, 'package', is_del=False)
    d = {}
    cur_d = d_class
    d_class_local = {}
    if data_path is not None:
        list_csvs = pit_render_test.walk(data_path, '.csv')
        for csv_item in list_csvs:
            name = str(csv_item).split('/')[-1][:-4]
            df = pd.read_csv(csv_item)
            d_class_local[name] = df
        cur_d = d_class_local
    print "in"
    for key in cur_d.keys():
        xml_df = cur_d[key]
        if xml_df is not None:
            package_prefix = str(key).split('.')[:-1]
            package_prefix = '.'.join(package_prefix)
            if package_prefix not in d:
                d[package_prefix] = {}
            d[package_prefix][key] = xml_df
    print "done"
    merge_dfs(d, out_path_dir)
Пример #6
0
def merge_all_csvs(root_path):
    print ''
    csvs_class = pit_render_test.walk(root_path, 'csvs', False)
    dico_paths = {}
    for item_p in csvs_class:
        if item_p[-1] == '/':
            item_p = item_p[:-1]
        if os.path.isdir("{}/class".format(item_p)) is False:
            print "[Error]  {}/class is not exist".format(item_p)
            continue
        classes_name = pit_render_test.walk("{}/class".format(item_p), '.csv')
        for klass in classes_name:
            name = str(klass).split('/')[-1][:-4]
            if name not in dico_paths:
                dico_paths[name] = []
            dico_paths[name].append(klass)
    return dico_paths
Пример #7
0
def wrapper_class_analysis(root_path):
    size_p = len(str(root_path).split('/'))
    list_p = pit_render_test.walk(root_path, 't=', False)
    list_p = [x for x in list_p if str(x).__contains__('ALL_') is False]
    #list_p = [x for x in list_p if str(x).__contains__('=20_')  ]
    for p in list_p:
        print p
        dico = merge_all_csvs(p)
        read_and_mereg(dico, p)
Пример #8
0
def func_start(main_root, mode='reg'):
    scan_obj = pt.walk(main_root, "t=", False, 0)
    li = []
    for x in scan_obj:
        print x
        li.append(time_budget_analysis(x, mode))
    df = pd.DataFrame(li)
    if main_root[-1] != '/':
        main_root = main_root + '/'
    df.to_csv(main_root + "class_analysis.csv")
Пример #9
0
def statistic_by_packaging(p_path):
    '''
    make a static over all packages in the given project

    :param p_path:
    :return:
    '''
    new_df_list = []
    all_dir = pt.walk(p_path, 'stat_r', False)
    for dir in all_dir:
        pass
Пример #10
0
def load__data(root_data):
    print ""
    list_files = pt.walk(root_data, '.csv')
    d_dico = {}
    for item_csv in list_files:
        if str(item_csv).__contains__(
                'org.apache.commons.math3.genetics') is False:
            pass
        prefix_name = str(item_csv).split('/')[-1][:-4]
        d_dico[prefix_name] = pd.read_csv(item_csv)
    return d_dico
Пример #11
0
def merge_by_packages_Roni(dir_root, out_path):
    '''
    this function output a matrix with the columns target_col for each file configurations in the time_FP and time_U
    :param dir_root:
    :param out_path:
    :return:
    '''
    print ""
    d = []
    name = dir_root.split('/')[-1]
    target_cols = [
        'KILLED', 'all_mutation', 'package', 'Test_LOC', 'package_class_size',
        'package_size_actual_pit', 'package_size_actual_test', 'criterion',
        'allocation_mode', 'K', 'time_budget'
    ]
    list_files = pt.walk(dir_root, '.csv')
    all_dfs = None
    for file_i in list_files:
        name_file = str(file_i).split('/')[-1][:-4]
        arr = name_file.split('_')
        k = arr[2]
        criterion = arr[-1]
        if str(name_file).__contains__('sum'):
            continue
        dir_name = str(file_i).split('/')[-2]
        arr = dir_name.split('_')
        allocation_mode = arr[1]
        time_budget = arr[2][2:]
        df = pd.read_csv(file_i, index_col=0)
        df['K'] = k
        df['criterion'] = criterion
        df['time_budget'] = time_budget
        df['allocation_mode'] = allocation_mode
        df = df[target_cols]
        # df[target_cols].to_csv("/home/ise/eran/bbb.csv", index=False)
        if all_dfs is None:
            all_dfs = df
            # print "len: {}".format(len(all_dfs))
            continue
        else:
            size_df = len(df)
            size_all_df = len(all_dfs)
            print "df:", size_df
            print "all_dfs:", size_all_df
            all_dfs = all_dfs.append(df)
            print "mereg: {}".format(len(all_dfs))
            all_dfs = all_dfs.fillna(0.0)
    if out_path[-1] == '/':
        all_dfs.to_csv("{}by_package_{}.csv".format(out_path, name),
                       index=False)
    else:
        all_dfs.to_csv("{}/by_package_{}.csv".format(out_path, name),
                       index=False)
    exit()
Пример #12
0
def xml_replace(project_path='/home/ise/eran/xml/02_26_13_27_45_t=30_/pit_test/ALL_U_t=30_it=0_/commons-math3-3.5-src'):
    '''
    if PIT output is xml, working on the csvs dir
    '''
    class_csv = '{}/csvs/class'.format(project_path)
    list_csvs = pit_render_test.walk(class_csv,'.csv')
    err={}
    for item in list_csvs:
        name = str(item).split('/')[-1][:-4]
        df = pd.read_csv(item)
        print list(df)
        exit()
Пример #13
0
def add_all_big_df(root_p):
    list_p = pit_render_test.walk(root_p, 'big_df')
    if len(list_p) > 0:
        big_df_all = pd.read_csv(list_p[0], index_col=0)
    else:
        print "didnt find any big_df Dataframe in path:{}".format(root_p)
        return
    for p in list_p[1:]:
        df = pd.read_csv(p, index_col=0)
        big_df_all = pd.merge(big_df_all, df, on=['ID'], how='outer')
        print "all_df: {}".format(len(big_df_all))
    return big_df_all
Пример #14
0
def miss_target_pit(path_PIT, dico):
    d = {}
    list_class_dir = pt.walk(path_PIT, '', False)
    for dir in list_class_dir:
        empty = 0
        name = str(dir).split('/')[-1]
        files = pt.walk(dir, 'mutations')
        if len(files) == 1:
            if os.stat(files[0]).st_size < 1:
                empty = 1
            suffix = files[0][-3:]
            if suffix == 'xml':
                d[name] = {'xml': 1, 'csv': 0, 'empty': empty}
            elif suffix == 'csv':
                d[name] = {'xml': 0, 'csv': 1, 'empty': empty}
            else:
                raise Exception('unreconzie suffix: (path)= {}'.format(
                    files[0]))
        elif len(files) > 1:
            #for p_item in files:
            #    print 'p_item:', p_item
            #    if str(p_item).__contains__('201804'):
            #        os.system('rm -r {}'.format(p_item))
            raise Exception(
                'more than on Mutations.xml/Mutations.xml in dir: {}'.format(
                    dir))
        else:
            d[name] = {'xml': 0, 'csv': 0, 'empty': empty}
    for ky in dico.keys():
        if ky in d:
            dico[ky]['pit_xml'] = d[ky]['xml']
            dico[ky]['pit_csv'] = d[ky]['csv']
            dico[ky]['pit_empty_file'] = d[ky]['empty']
        else:
            print 'in'
            dico[ky]['pit_xml'] = 0
            dico[ky]['pit_csv'] = 0
            dico[ky]['pit_empty_file'] = 0
Пример #15
0
def make_big_csv(root_p):
    list_p = pit_render_test.walk(root_p, 'out_xml_all', False)
    for p in list_p:
        print p
        cols = ['ID', 'KILL_Avg_FP', 'KILL_Sum_FP', 'KILL_Avg_U', 'KILL_Sum_U']
        time_b = str(p).split('/')[-2].split('=')[1]
        for j in range(1, len(cols)):
            cols[j] = "t={}_{}".format(time_b, cols[j])
        acc = 0
        name = str(p).split('/')[-2].split('_')[-2]
        csv_lists = pit_render_test.walk(p, '.csv')
        big_df = pd.DataFrame(columns=cols)
        p = '/'.join(str(p).split('/')[:-1])
        for csv_item in csv_lists:
            print "csv_item =", csv_item
            df = pd.read_csv(csv_item, index_col=0)
            df = df[cols]
            acc += int(len(df))
            big_df = pd.concat([big_df, df])
            if acc != int(len(big_df)):
                print "acc: {} big: {}".format(acc, int(len(big_df)))
            #print "[Good] big_df size: ", len(big_df)
        flush_csv(p, big_df, 'big_df_{}'.format(name))
        print 'done'
Пример #16
0
def collctor(path, name_file):
    out_path = '/'.join(str(path).split('/')[:-1])
    list_dico = []
    csv_list = pit_render_test.walk(path, '.csv')
    for item_csv in csv_list:
        name = str(item_csv).split('/')[-1][:-4]
        df = pd.read_csv(item_csv, index_col=0)
        list_cols = list(df)
        list_cols = [x for x in list_cols if str(x).__contains__('t=')]
        d = {'package': name}
        for col in list_cols:
            d[col] = df[col].mean()
        list_dico.append(d)
    ans_df = pd.DataFrame(list_dico)
    flush_csv(out_path, ans_df, name_file)
Пример #17
0
def time_budget_analysis(path_root, mode):
    list_fp = []
    list_u = []
    res_scanner = pt.walk(path_root, "commons-math3-3.5-src", False)
    res_scanner = pt.walk_rec(path_root, [], "commons-", False, -3)
    p_path = path_root
    if p_path[-1] != '/':
        p_path = p_path + '/'
    if os.path.isdir("{}stat_r/".format(p_path)):
        os.system("rm -r {}".format(p_path + "stat_r/"))
    os.system("mkdir {}".format(p_path + "stat_r/"))
    log_path = "{}stat_r/".format(p_path)
    for i_path in res_scanner:
        # if str(i_path).__contains__("ALL_U") is False:
        #    continue
        name_i = get_name_path(i_path, -2)
        allocation_mode = str(name_i).split('_')[1]
        time_budget = str(name_i).split('_')[2][2:]

        javas_path = "{}/src/main/java/org/".format(i_path)
        classes_path = "{}/target/classes/org/".format(i_path)
        tests_path = "{}/src/test/java/org/".format(i_path)
        pit_path2 = None
        pit_path = None
        if mode == 'rev':
            pit_path = "{}/csvs/class".format(i_path)
        else:
            pit_path2 = "{}/target/pit-reports/".format(i_path)
        df_i = missing_class_gen(root_class=classes_path,
                                 root_test=tests_path,
                                 java_src=javas_path,
                                 log=log_path,
                                 name=name_i,
                                 pit=pit_path,
                                 pit2=pit_path2)
        if allocation_mode == 'FP':
            list_fp.append(df_i)
        elif allocation_mode == 'U':
            list_u.append(df_i)
        else:
            raise Exception(
                'No allocation mode is known in name:{} \n path:{}'.format(
                    name_i, i_path))
    if len(list_u) > 0:
        merge_df(list_u, log_path, 'u')
    if len(list_fp) > 0:
        merge_df(list_fp, log_path, 'fp')
    return None
Пример #18
0
def merge_by_packages(dir_root, out_path):
    print ""
    d = []
    name = dir_root.split('/')[-1]
    target_cols = [
        'KILLED', 'all_mutation', 'package', 'package_class_size',
        'package_size_actual_pit', 'package_size_actual_test'
    ]
    list_files = pt.walk(dir_root, '.csv')
    all_dfs = None
    for file_i in list_files:
        name_file = str(file_i).split('/')[-1][:-4]
        if str(name_file).__contains__('sum'):
            continue
        dir_name = str(file_i).split('/')[-2]
        arr = str(name_file).split('_')
        k_num = arr[2]
        criterion = arr[4]
        df = pd.read_csv(file_i, index_col=0)
        col_list = list(df)
        name_col = "K_{}_mode_{}_dir_{}".format(k_num, criterion, dir_name)
        print list(df)
        df = df[target_cols]
        df.rename(columns={'KILLED': '{}_{}'.format(name_col, 'kill')},
                  inplace=True)
        # df.rename(columns={'all_mutation': '{}_{}'.format(name_col,'all_bug')}, inplace=True)
        if all_dfs is None:
            all_dfs = df
            print "len: {}".format(len(all_dfs))
            continue
        else:

            all_dfs = pd.merge(all_dfs,
                               df,
                               on=[
                                   'package', 'package_class_size',
                                   'package_size_actual_pit',
                                   'package_size_actual_test', 'all_mutation'
                               ])
            print "len: {}".format(len(all_dfs))
    if out_path[-1] == '/':
        all_dfs.to_csv("{}by_package_{}.csv".format(out_path, name),
                       index=False)
    else:
        all_dfs.to_csv("{}/by_package_{}.csv".format(out_path, name),
                       index=False)
Пример #19
0
def get_ID_index_table(root_path):
    res = pit_render_test.walk(root_path, 'index_er')
    index_df = pd.DataFrame(columns=['ID', 'mutatedClass'])
    if len(res) > 0:
        index_df = pd.read_csv(res[0], index_col=0)
        print "size:{}".format(len(index_df))
    for csv_p in res[1:]:
        df = pd.read_csv(csv_p, index_col=0)
        index_df = pd.merge(index_df,
                            df,
                            on=['ID', 'mutatedClass'],
                            how='outer')
        print "size:{}".format(len(index_df))
    index_df.rename(columns={'mutatedClass': '{}'.format('class')},
                    inplace=True)
    flush_csv(root_path, index_df, 'indexer')
    print "done"
    return True
Пример #20
0
 def get_outputs_test(self, clean=True):
     if clean:
         os.chdir(self.mvn_path)
         os.system("mvn clean test >> out_test_start.txt  2>&1")
     if (os.path.isdir(self.mvn_path + self.test_dir)):
         all_xml = pit_render_test.walk(
             self.mvn_path + self.test_dir,
             ".xml",
         )
         if len(all_xml) == 0:
             print "[Error] No XML files found in {}".format(self.mvn_path +
                                                             self.test_dir)
             return None
         return all_xml
     else:
         print "[Error] No directory {0} in {1}".format(
             self.test_dir, os.getcwd())
         return None
Пример #21
0
def miss_PIT(path_PIT, dico):
    d = {}
    if os.path.isdir(path_PIT) is False:
        return
    # path_PIT = '/home/ise/eran/xml/02_26_13_27_45_t=30_/pit_test/ALL_FP_t=30_it=0_/commons-math3-3.5-src/csvs/class'
    list_class_csv = pt.walk(path_PIT, '.csv')

    for csv in list_class_csv:
        name = str(csv).split('/')[-1][:-4]
        if os.stat(csv).st_size == 0:
            d[name] = 0
            list_class_csv.remove(csv)
        else:
            d[name] = 1
    for ky in dico.keys():
        if ky in d:
            dico[ky]['pit'] = d[ky]
        else:
            dico[ky]['pit'] = 0
Пример #22
0
def get_all_xml(path, root_path_project, mod):
    d_class = {}
    print "-" * 30
    print path
    cols = ['ID', 'mutatedClass']
    err = {}
    index_df = pd.DataFrame(columns=cols)
    err_name = {}
    out_path_dir = mkdir_system(path, 'class', is_del=True)
    out_path_index = mkdir_system(path, 'index', is_del=True)
    list_xml = pit_render_test.walk(root_path_project, 'mutations.xml')
    if list_xml is None:
        print "[Error] no mutations xmls found in the following path --> {}".format(
            root_path_project)
        return {}
    all = len(list_xml)
    #x_list =[]
    #for x in list_xml:
    #    if str(x).__contains__('SphericalCoordinat'):
    #        x_list.append(x)
    #list_xml= x_list
    df_dico_log = []
    for x_xml in list_xml:
        print all
        all = all - 1
        if len(x_xml) < 1:
            continue
        name_file = str(x_xml).split('/')[-2]
        #print "name: ",name_file
        xml_df, test_name = pars_xml_to_csv(x_xml, mod)

        #bulid the index data-frame

        if xml_df is None:
            #d_class[name_file] = None
            df_dico_log.append({'class': name_file, 'info': 'empty xml'})
            print "empty xml file in class: {}".format(name_file)
            continue
        index_df = pd.concat([xml_df[cols], index_df])
        if test_name is not None and test_name != name_file:
            print "[Error] {} != {}".format(name_file, test_name)
            df_dico_log.append({
                'class': name_file,
                'info': 'file contain {}'.format(test_name)
            })
            err_name[test_name] = name_file
            err[test_name] = xml_df
            continue
        flush_csv(out_path_dir, xml_df, name_file)
        d_class[name_file] = xml_df
    print err_name
    del_klass = []
    for key in err.keys():
        xml_df = err[key]
        name_file = key
        flush_csv(out_path_dir, xml_df, name_file)
        if key in d_class:
            df_dico_log.append({
                'class': name_file,
                'info': 'overwrite'.format(name_file)
            })
        d_class[name_file] = xml_df
    flush_csv(out_path_index, index_df, 'index_er')
    if len(df_dico_log) > 0:
        log_df = pd.DataFrame(df_dico_log)
        print "len_log", len(df_dico_log)
        flush_csv(out_path_index, log_df, 'log')
    return d_class
Пример #23
0
def get_class_size(root_path):
    walker_obj = pt.walk(root_path, "")