Пример #1
0
def draw_sample_step_main(db_path, table_name):
    df = read_experiment_result_df(db_path, table_name)
    df = df[df['compile_res'].map(lambda x: x == 1)]
    print(len(df))
    sample_step_list = df['sample_step'].tolist()
    c = Counter(sample_step_list)
    print(c)
    step_bar_figure(c)
Пример #2
0
def calculate_error_msg_count(db_path, table_name, key):
    df = read_experiment_result_df(db_path, table_name)
    df = df[df[key].map(lambda x: x is not '')]
    df[key] = df[key].map(json.loads)
    error_list = list(more_itertools.collapse(df[key].tolist()))
    standard_error_list = [replace_itentifier(e) for e in error_list]
    c = stat_error_type_count(standard_error_list)
    # c['expected identifier or <CODE> before <CODE> token'] = 0
    # c['expected identifier or <CODE> before <CODE>'] = 0
    return c
Пример #3
0
def filter_program_id_main(db_path, table_name, new_table_name):
    df = read_experiment_result_df(db_path, table_name)
    grouped = df.groupby('id')
    print('group length: ', len(grouped))
    save_list = []
    for name, group in grouped:
        one = select_best_records(group)
        save_list += [one]

    print('save list length: ', len(save_list))
    create_table(db_path, DATA_RECORDS_DEEPFIX, replace_table_name=new_table_name)
    run_sql_statment(db_path, DATA_RECORDS_DEEPFIX, 'insert_ignore', save_list, replace_table_name=new_table_name)
Пример #4
0
def length_statistics(db_path, table_name):
    df = read_experiment_result_df(db_path, table_name)
    df = df[df['code_list'].map(lambda x: x != '')]
    df['code_list'] = df['code_list'].map(json.loads)
    df['code_length'] = df['code_list'].map(len)
    df['part_result'] = df.apply(lambda one: 1 if one['compile_res'] != 1 and
                                                  one['error_count'] < one['original_error_count'] and
                                                  one['error_count'] > 0 else 0, raw=True, axis=1)
    code_length = df['code_length'].tolist()
    compile_res = df['compile_res'].tolist()
    part_res = df['part_result'].tolist()
    print('min length: {}, max_length: {}'.format(min(code_length), max(code_length)))
    return code_length, compile_res, part_res
Пример #5
0
def stat_main(db_path,
              table_name,
              compile_result=True,
              part_correct=True,
              error_solver=True,
              max_sample_step=None):
    df = read_experiment_result_df(db_path, table_name)
    if max_sample_step is not None:
        df = df[df['sample_step'].map(lambda x: x <= max_sample_step)]
    if compile_result:
        calculate_compile_result(df)
    if part_correct:
        calculate_part_correct(df)
    if error_solver:
        calculate_error_solver(df)
Пример #6
0
def check_error_count_main(db_path, table_name):
    deepfix_df = read_deepfix_error_records()
    ids = deepfix_df['id'].tolist()
    deepfix_error_count = deepfix_df['errorcount'].tolist()
    error_dict = {i: c for i, c in zip(ids, deepfix_error_count)}
    df = read_experiment_result_df(db_path, table_name)
    df_ids = df['id'].tolist()
    df_error_count = df['original_error_count'].tolist()
    df_error_dict = {i: c for i, c in zip(df_ids, df_error_count)}

    tot = 0
    for i in ids:
        if error_dict[i] != df_error_dict[i]:
            tot += 1
    print(tot)
Пример #7
0
def statistics_score(db_path, table_name):
    df = read_experiment_result_df(db_path, table_name)
    df = df[df['code_list'].map(lambda x: x != '')]
    df['code_list'] = df['code_list'].map(json.loads)
    df['code_length'] = df['code_list'].map(len)
    df['part_result'] = df.apply(
        lambda one: 1 if one['compile_res'] != 1 and one['error_count'] < one[
            'original_error_count'] and one['error_count'] > 0 else 0,
        raw=True,
        axis=1)

    em_df = df[df['compile_res'].map(lambda x: x == 1)]
    pm_df = df[df['part_result'].map(lambda x: x == 1)]

    total_bin = calculate_bin_count([1 for _ in df['code_length'].tolist()],
                                    df['code_length'].tolist())
    em_bin = calculate_bin_count(em_df['compile_res'].tolist(),
                                 em_df['code_length'].tolist())
    pm_bin = calculate_bin_count(pm_df['part_result'].tolist(),
                                 pm_df['code_length'].tolist())
    print(total_bin)
    print(em_bin)
    print(pm_bin)

    em_ratio_bin = {k: em_bin[k] / v for k, v in total_bin.items()}
    pm_ratio_bin = {k: pm_bin.get(k, 0) / v for k, v in total_bin.items()}
    print(em_ratio_bin)
    print(pm_ratio_bin)

    error_msg_bin = calculate_bin_count(df['error_count'].tolist(),
                                        df['code_length'].tolist())
    total_error_msg_bin = calculate_bin_count(
        df['original_error_count'].tolist(), df['code_length'].tolist())
    print(error_msg_bin)
    print(total_error_msg_bin)

    resolved_ratio_bin = {
        k: (v - error_msg_bin[k]) / v
        for k, v in total_error_msg_bin.items()
    }
    print(resolved_ratio_bin)

    draw_line_figure([em_ratio_bin, pm_ratio_bin, resolved_ratio_bin],
                     ['EM', 'PM', 'EMR'])
Пример #8
0
def calculate_top_error_type(db_path, table_name, key):
    df = read_experiment_result_df(db_path, table_name)
    df = df[df[key].map(lambda x: x is not '')]
    df[key] = df[key].map(json.loads)
    standard_key = 'standard_' + key
    df[standard_key] = df[key].map(
        lambda x: [replace_itentifier(i) for i in x])
    # df['standard_original_errors_set'] = df['standard_original_errors'].map(set)
    standard_error_list = set(
        more_itertools.collapse(df[standard_key].tolist()))
    standard_dict = {e: 0 for e in standard_error_list}

    for e_list in df[standard_key]:
        e_set = set(e_list)
        for e in e_set:
            standard_dict[e] += 1

    # standard_error_list = [replace_itentifier(e) for e in error_list]
    # c = stat_error_type_count(standard_error_list)
    return standard_dict
Пример #9
0
def main_compile_code_and_read_error_info(db_path,
                                          table_name,
                                          do_compile_original=False):
    df = read_experiment_result_df(db_path, table_name)
    df['includes'] = df['includes'].map(json.loads)
    if do_compile_original:
        df['full_code'] = df.apply(consist_full_code,
                                   raw=True,
                                   axis=1,
                                   code_key='code')
        ids = df['id'].tolist()
        full_original_code = df['full_code'].tolist()
        compile_res_list, compile_info_list, error_list = pool_compile_and_save(
            full_original_code)
        save_records = create_compile_info_save_records(
            ids, compile_info_list, error_list)
        save_compile_result(save_records,
                            db_path,
                            table_name,
                            command_key='update_original_compile_info')

    effect_df = df[df['sample_step'].map(lambda x: x >= 0)]
    effect_df['full_sample_code'] = effect_df.apply(consist_full_code,
                                                    raw=True,
                                                    axis=1,
                                                    code_key='sample_code')
    effect_ids = effect_df['id'].tolist()
    full_sample_code = effect_df['full_sample_code'].tolist()
    compile_res_list, compile_info_list, error_list = pool_compile_and_save(
        full_sample_code)
    save_records = create_compile_info_save_records(effect_ids,
                                                    compile_info_list,
                                                    error_list)
    save_compile_result(save_records,
                        db_path,
                        table_name,
                        command_key='update_sample_compile_info')