Exemplo n.º 1
0
def phase1(pso_runs, output_path, func_index, parameters, inputcases_range,
           const_range, coeff_range):
    if not os.path.isdir(output_path):
        os.mkdir(output_path)
    if not os.path.isdir(f"{output_path}/phase1"):
        os.mkdir(f"{output_path}/phase1")

    no_of_elements_input = settings.getNEI(func_index)
    no_of_elements_output = settings.getNEO(func_index)

    no_of_particles = 30
    no_of_inputcases = 100

    min_cost_candidates = []
    A_candidates = []
    B_candidates = []

    parameters_int = [int(e) for e in parameters.split("_")]
    no_of_inputs = parameters_int[0]
    mode_input_relation = parameters_int[1]
    mode_output_relation = parameters_int[2]
    degree_of_input_relation = parameters_int[3]
    degree_of_output_relation = parameters_int[4]

    pso_run = 0
    while True:
        # print("====================")
        # print(f"    searching: func_index is {func_index}, parameters is {parameters}, pso_run is {pso_run+1}.")
        # print(f'running pso run {pso_run+1}')
        AutoMR = PSO(settings.program, func_index, no_of_inputs,
                     mode_input_relation, mode_output_relation,
                     degree_of_input_relation, degree_of_output_relation,
                     no_of_elements_input, no_of_elements_output,
                     no_of_particles, no_of_inputcases, inputcases_range,
                     const_range, coeff_range)
        min_cost, A, B = AutoMR.run()

        min_cost_candidates.append(np.round(min_cost, decimals=3))
        A_candidates.append(A)
        B_candidates.append(B)
        np.savez('{}/phase1/{}_{}_{}_{}_{}_{}.npz'.format(
            output_path, func_index, no_of_inputs, mode_input_relation,
            mode_output_relation, degree_of_input_relation,
            degree_of_output_relation),
                 min_cost_candidates=min_cost_candidates,
                 A_candidates=np.array(A_candidates),
                 B_candidates=np.array(B_candidates))
        # print(f"search results:")
        # print("A:")
        # print(A)
        # print("B:")
        # print(B)
        # print(f"Corresponding cost is {min_cost}")
        # print("----------\n")

        pso_run += 1
        if pso_run >= pso_runs:
            break
Exemplo n.º 2
0
def load_npz_to_pandas(result_path):
    file_name = ntpath.basename(result_path)
    func_index = int(file_name[0:file_name.find('_')])
    parameters = file_name[-26:-17]
    parameters_int = [int(e) for e in parameters.split("_")]

    candidates_all = np.load(result_path)
    A_candidates = candidates_all["A_candidates"]
    B_candidates = candidates_all["B_candidates"]
    MRs = {parameters: [A_candidates, B_candidates]}

    hNOI = parameters_int[0]
    hDIR = parameters_int[3]
    hDOR = parameters_int[4]

    # to store all distinct As
    x_all = {}

    NEI = settings.getNEI(func_index)
    NEO = settings.getNEO(func_index)

    hu = str_comb([f"i0_{i+1}" for i in range(NEI)], hDIR)

    MR_all = []
    for parameters, AB_after_CS in MRs.items():
        parameters_int = [int(e) for e in parameters.split("_")]
        NOI = parameters_int[0]
        MIR = parameters_int[1]
        MOR = parameters_int[2]
        DIR = parameters_int[3]
        DOR = parameters_int[4]

        As = AB_after_CS[0]
        Bs = AB_after_CS[1]

        u = str_comb([f"i0_{i+1}" for i in range(NEI)], DIR)

        for i_A in range(As.shape[0]):
            A = As[i_A]

            # store the fx
            o_orig = ["o0"]

            for i_NOI in range(A.shape[0]):
                A_iNOI = A[i_NOI]

                # check whether add a new x or not
                x_temp = pd.DataFrame(columns=hu)
                for i_EOI in range(NEI):
                    x_temp_iEOI = pd.DataFrame([A_iNOI[i_EOI]], columns=u, index=[f"e{i_EOI+1}"])
                    x_temp = x_temp.append(x_temp_iEOI, ignore_index=False, sort=False)
                    x_temp = x_temp.fillna(0)
                # print(x_temp)
                isNew = True
                for x, A_x in x_all.items():
                    # print(A_x)
                    # print(x_temp.values)
                    isExist = np.allclose(A_x, x_temp.values,atol=0.05, rtol=0.1, equal_nan=True)
                    # print(isExist)
                    if isExist:
                        o_orig.append(f"o{x}")
                        isNew = False
                        break
                if isNew:
                    # print(len(x_all))
                    number_of_x = len(x_all)
                    x_all[f"i{number_of_x + 1}"] = x_temp.values
                    o_orig.append(f"o{number_of_x + 1}")
                    # print(o_orig)

            # create corresponding output elemets
            o = []
            for i in range(len(o_orig)):
                for i_ele in range(NEO):
                    o.append(f"{o_orig[i]}_{i_ele + 1}")
            # print(o)

            # create v
            v = str_comb(o, DOR)
            # print(v)

            MR = pd.DataFrame([Bs[i_A]], columns=v)
            MR = MR.groupby(MR.columns, axis=1).sum()
            # print(MR.columns)
            MR_all.append(MR)

    # for i in range(len(MR_all)):
    #     print(MR_all[i].columns)

    MR_all_df = pd.concat((df for df in MR_all), ignore_index=True, sort=True)
    y_all = MR_all_df.columns
    # print(len(y_all))
    MR_all_df = MR_all_df.fillna(0)

    df_x_all = pd.DataFrame(columns=hu)
    for k, v in x_all.items():
        for idx_e in range(NEI):
            df_x_all.loc[f'{k}_{idx_e+1}'] = v[idx_e]

    MR_all_df.index = [f'MR{i+1}' for i in MR_all_df.index.values]
    return parameters, df_x_all, MR_all_df
Exemplo n.º 3
0
def checkAfterSVD(folder_path, func_indices):
    filer_phase3_df = pd.read_csv(f"{folder_path}/counts.csv", index_col=0)
    for func_index in func_indices:
        no_of_elements_output = settings.getNEO(func_index)
        no_of_elements_input = settings.getNEI(func_index)
        inputcases_range = settings.get_input_range(func_index)

        no_of_testcases = 100
        MRs_types = os.listdir(f"{folder_path}/phase3")

        for MRs_type in MRs_types:
            # for the MRs stored in npz format
            if MRs_type.startswith(f"{func_index}_") and MRs_type.endswith(
                    ".npz"):
                pass
            # for the MRs stored in pkl format
            elif MRs_type.startswith(f"{func_index}_") and MRs_type.endswith(
                    "group_after_cs_svd.pkl"):
                i0_all = Phase1_PSOSearch.generate_i0_all(
                    settings.get_input_datatype(func_index),
                    settings.get_input_range(func_index), no_of_testcases)

                with open(f"{folder_path}/phase3/{MRs_type}", "rb") as f:
                    MRs_dict = pickle.load(f)
                for parameters, MRs in MRs_dict.items():
                    filer_phase3 = 0
                    # print(f"func_index is {func_index}, parameters = {parameters}")
                    x_all_dict = MRs[0]
                    # print(x_all_dict)
                    y_all_df = MRs[1]
                    hDIR = MRs[2]

                    y_o_isKill_df = pd.DataFrame()
                    for index_i0 in range(i0_all.shape[0]):
                        i0 = i0_all[index_i0]
                        u = Phase1_PSOSearch.comb(i0, hDIR)
                        x_value_dict = {}
                        y_element_value_dict = {}
                        for x_name, A in x_all_dict.items():
                            # print(f"x_name is {x_name}")
                            # print(f"A is {A}")
                            x = np.dot(A, u)
                            x_value_dict[x_name] = x
                            y = settings.program(x, func_index)
                            for index_eo in range(no_of_elements_output):
                                y_element_value_dict[
                                    f"f{x_name}_{index_eo + 1}"] = y[index_eo]
                        y0 = settings.program(i0, func_index)
                        for index_eo in range(no_of_elements_output):
                            y_element_value_dict[f"fx0_{index_eo + 1}"] = y0[
                                index_eo]
                        y_all_names = y_all_df.columns.values
                        y_all_values = np.zeros(y_all_names.shape)
                        for index_y in range(y_all_names.shape[0]):
                            y_names = list(y_all_names[index_y])
                            y_elements = []
                            for ii in range(len(y_names)):
                                try:
                                    y_elements.append(float(y_names[ii]))
                                except:
                                    y_elements.append(
                                        y_element_value_dict[y_names[ii]])
                            y_all_values[index_y] = np.product(y_elements)
                        for index_MR in range(y_all_df.shape[0]):
                            B = y_all_df.iloc[index_MR, :].values
                            Bv = np.dot(B, y_all_values)
                            if np.isreal(Bv) and not np.isnan(Bv):
                                if np.abs(Bv) < 0.1:
                                    y_o_isKill_df.loc[index_MR, index_i0] = 0
                                else:
                                    y_o_isKill_df.loc[index_MR, index_i0] = 1
                            else:
                                y_o_isKill_df.loc[index_MR, index_i0] = 1

                    for index_MR in range(y_o_isKill_df.shape[0]):
                        kill_o_number = np.sum(
                            y_o_isKill_df.iloc[index_MR, :].values)
                        cost_o = np.divide(kill_o_number, no_of_testcases)
                        if cost_o < 0.05:
                            filer_phase3 += 1
                        else:
                            # print(MRs_dict[parameters][1])
                            MRs_dict[parameters][1] = MRs_dict[parameters][
                                1].drop([index_MR])
                    # print("----------")
                    # print(parameters)
                    # print(f"before filter {len(y_all_df)}")
                    # print(f"after filter left {filer_phase3}")

                    filer_phase3_df.loc[f"{func_index}_{parameters}",
                                        "phase3"] = filer_phase3

                with open(
                        f"{folder_path}/phase3/{func_index}_MRs_group_after_cs_svd.pkl",
                        "wb") as f2:
                    pickle.dump(MRs_dict, f2, pickle.HIGHEST_PROTOCOL)

            elif MRs_type.startswith(f"{func_index}_") and MRs_type.endswith(
                    "other_types_after_cs_svd.pkl"):
                i0_all = Phase1_PSOSearch.generate_i0_all(
                    settings.get_input_datatype(func_index),
                    settings.get_input_range(func_index), no_of_testcases)

                with open(f"{folder_path}/phase3/{MRs_type}", "rb") as f:
                    MRs_dict = pickle.load(f)
                for parameters, MRs in MRs_dict.items():
                    filer_phase3 = 0
                    parameters_int = [int(e) for e in parameters.split("_")]
                    no_of_inputs = parameters_int[0]
                    mode_input_relation = parameters_int[1]
                    mode_output_relation = parameters_int[2]
                    degree_of_input_relation = parameters_int[3]
                    degree_of_output_relation = parameters_int[4]

                    x_all_dict = MRs[0]
                    y_all_df = MRs[1]

                    y_o_isKill_df = pd.DataFrame()

                    for index_i0 in range(i0_all.shape[0]):
                        i0 = i0_all[index_i0]
                        u = Phase1_PSOSearch.comb(i0, degree_of_input_relation)
                        # print(u)
                        x_value_dict = {}
                        y_element_value_dict = {}
                        for x_name, A in x_all_dict.items():
                            # print(x_name)
                            # print(A)
                            x = np.dot(A, u)
                            x_value_dict[x_name] = x
                            y = settings.program(x, func_index)
                            for index_eo in range(no_of_elements_output):
                                y_element_value_dict[
                                    f"f{x_name}_{index_eo + 1}"] = y[index_eo]
                        y0 = settings.program(i0, func_index)
                        for index_eo in range(no_of_elements_output):
                            y_element_value_dict[f"fx0_{index_eo + 1}"] = y0[
                                index_eo]

                        y_all_names = y_all_df.columns.values
                        y_all_values = np.zeros(y_all_names.shape)
                        for index_y in range(y_all_names.shape[0]):
                            y_names = list(y_all_names[index_y])
                            y_elements = []
                            for ii in range(len(y_names)):
                                try:
                                    y_elements.append(float(y_names[ii]))
                                except:
                                    y_elements.append(
                                        y_element_value_dict[y_names[ii]])
                            y_all_values[index_y] = np.product(y_elements)

                        for index_MR in range(y_all_df.shape[0]):
                            B = y_all_df.iloc[index_MR, :].values
                            Bv = np.dot(B, y_all_values)
                            if np.isreal(Bv) and not np.isnan(Bv):
                                if np.abs(Bv) < 0.1:
                                    y_o_isKill_df.loc[index_MR, index_i0] = 0
                                else:
                                    y_o_isKill_df.loc[index_MR, index_i0] = 1
                            else:
                                y_o_isKill_df.loc[index_MR, index_i0] = 1

                    for index_MR in range(y_o_isKill_df.shape[0]):
                        kill_o_number = np.sum(
                            y_o_isKill_df.iloc[index_MR, :].values)
                        cost_o = np.divide(kill_o_number, no_of_testcases)
                        if cost_o < 0.05:
                            filer_phase3 += 1
                        else:
                            MRs_dict[parameters][1] = MRs_dict[parameters][
                                1].drop([index_MR])

                    # print("----------")
                    # print(parameters)
                    # print(f"before filter {len(y_all_df)}")
                    # print(f"after filter left {filer_phase3}")
                    filer_phase3_df.loc[f"{func_index}_{parameters}",
                                        "phase3"] = filer_phase3

                with open(
                        f"{folder_path}/phase3/{func_index}_MRs_other_types_after_cs_svd.pkl",
                        "wb") as f2:
                    pickle.dump(MRs_dict, f2, pickle.HIGHEST_PROTOCOL)
Exemplo n.º 4
0
def phase2(output_path, parameters, func_index, output_name):
    no_of_inputcases = 100

    if os.path.isfile(f"{output_path}/"):
        file_statistics = pd.read_csv(f"{output_path}/counts.csv", index_col=0)
    else:
        file_statistics = pd.DataFrame()

    parameters_int = [int(e) for e in parameters.split("_")]
    no_of_inputs = parameters_int[0]
    mode_input_relation = parameters_int[1]
    mode_output_relation = parameters_int[2]
    degree_of_input_relation = parameters_int[3]
    degree_of_output_relation = parameters_int[4]

    no_of_elements_input = settings.getNEI(func_index)
    no_of_elements_output = settings.getNEO(func_index)

    A_candidates_after_filter = []
    B_candidates_after_filter = []
    ini_count = 0
    survive_count = 0

    results_all = np.load('{}/phase1/{}'.format(output_path, output_name))
    min_cost_candidates = results_all['min_cost_candidates']
    A_candidates = results_all['A_candidates']
    B_candidates = results_all['B_candidates']
    all_count = min_cost_candidates.shape[0]

    for index_candidate in range(all_count):
        min_cost = min_cost_candidates[index_candidate]
        A = A_candidates[index_candidate]
        B = B_candidates[index_candidate]

        isPass = True
        isPassPhase1 = False

        if mode_output_relation == 1:
            if min_cost < 5:
                ini_count += 1
                isPassPhase1 = True
        else:
            if min_cost < 0.05:
                ini_count += 1
                isPassPhase1 = True

        if isPassPhase1:
            for index_test in range(100):
                i0_all = Phase1_PSOSearch.generate_i0_all(
                    settings.get_input_datatype(func_index),
                    settings.get_input_range(func_index), no_of_inputcases)
                survive_cost = get_cost_of_AB(settings.program, func_index, A,
                                              B, i0_all, mode_input_relation,
                                              mode_output_relation,
                                              degree_of_input_relation,
                                              degree_of_output_relation,
                                              no_of_elements_output)
                if survive_cost >= 0.05:
                    isPass = False
                    break

            if isPass:
                survive_count += 1
                A_candidates_after_filter.append(A)
                B_candidates_after_filter.append(B)

    results_all.close()

    A_candidates_after_filter = np.array(A_candidates_after_filter)
    B_candidates_after_filter = np.array(B_candidates_after_filter)

    if not os.path.isdir("{}/phase2".format(output_path)):
        os.mkdir("{}/phase2".format(output_path))

    np.savez(
        f'{output_path}/phase2/{func_index}_{parameters}_after_filter.npz',
        A_candidates=A_candidates_after_filter,
        B_candidates=B_candidates_after_filter)

    file_statistics.loc[f"{func_index}_{parameters}", "pso"] = all_count
    file_statistics.loc[f"{func_index}_{parameters}", "phase1"] = ini_count
    file_statistics.loc[f"{func_index}_{parameters}", "phase2"] = survive_count
Exemplo n.º 5
0
def phase3(folder_path, func_indices, coeff_range, const_range):
    if not os.path.isdir(f"{folder_path}/phase3"):
        os.mkdir(f"{folder_path}/phase3")

    if os.path.isfile(f"{folder_path}/counts.csv"):
        results = pd.read_csv(f"{folder_path}/counts.csv", index_col=0)
    else:
        results = pd.DataFrame()

    if os.path.isfile(f"{folder_path}/performance.csv"):
        times = pd.read_csv(f"{folder_path}/performance.csv", index_col=0)
    else:
        times = pd.DataFrame()

    for func_index in func_indices:
        stats_after_cs_svd_df = {}
        time_cs_svd = {}

        # get NEI NEO
        NEI = settings.getNEI(func_index)
        NEO = settings.getNEO(func_index)

        # select the filtered MRs of the func_name
        AB_all = {}
        for filename in os.listdir(f"{folder_path}/phase2"):
            if filename.startswith(f"{func_index}_") and filename.endswith(
                    "after_filter.npz"):
                # example: np_log1p_2_1_1_1_1_after_filter.npz
                parameters = filename[-26:-17]
                time_cs_svd[parameters] = 0
                MRs = np.load(f"{folder_path}/phase2/{filename}")
                As = MRs["A_candidates"]
                Bs = MRs["B_candidates"]
                if As.shape[0] > 1:
                    AB_all[parameters] = [As, Bs]
                elif As.shape[0] == 1:
                    stats_after_cs_svd_df[parameters] = 1
                    np.savez(
                        f'{folder_path}/phase3/{func_index}_{parameters}_after_cs_svd.npz',
                        A_candidates=As,
                        B_candidates=Bs)
                else:
                    stats_after_cs_svd_df[parameters] = 0

        # filer using CS
        AB_all_after_CS = {}
        MRs_each_type_after_cs_svd = {}
        for parameters, ABs in AB_all.items():
            t1 = datetime.datetime.now()

            parameters_int = [int(e) for e in parameters.split("_")]
            NOI = parameters_int[0]
            MIR = parameters_int[1]
            MOR = parameters_int[2]
            DIR = parameters_int[3]
            DOR = parameters_int[4]

            A_candidates = ABs[0]
            B_candidates = ABs[1]

            A_candidates_after_CS, B_candidates_after_CS, before_CS, after_CS = z3_check(
                NEI, NEO, NOI, MIR, MOR, DIR, DOR, A_candidates, B_candidates,
                coeff_range, const_range)
            AB_all_after_CS[parameters] = [
                A_candidates_after_CS, B_candidates_after_CS
            ]

            # for output inequality MRs, can't use svd so just save cs results
            if parameters_int[2] != 1:
                stats_after_cs_svd_df[
                    parameters] = A_candidates_after_CS.shape[0]
                np.savez(
                    f'{folder_path}/phase3/{func_index}_{parameters}_after_cs_svd.npz',
                    A_candidates=A_candidates_after_CS,
                    B_candidates=B_candidates_after_CS)

            # do svd for output equality MRs
            else:
                # do svd for the type which has more than 1 MRs
                if A_candidates_after_CS.shape[0] > 1:
                    MRs_each_type_after_cs_svd[parameters] = list(
                        svd_check({parameters: AB_all_after_CS[parameters]},
                                  NEI, NEO))
                    stats_after_cs_svd_df[
                        parameters] = MRs_each_type_after_cs_svd[parameters][
                            1].shape[0]
                else:
                    stats_after_cs_svd_df[
                        parameters] = A_candidates_after_CS.shape[0]
                    np.savez(
                        f'{folder_path}/phase3/{func_index}_{parameters}_after_cs_svd.npz',
                        A_candidates=A_candidates_after_CS,
                        B_candidates=B_candidates_after_CS)

            t2 = datetime.datetime.now()
            cost_time = np.round((t2 - t1).total_seconds(), 2)
            time_cs_svd[parameters] = time_cs_svd[parameters] + cost_time
        if len(MRs_each_type_after_cs_svd) > 0:
            with open(
                    f"{folder_path}/phase3/{func_index}_MRs_other_types_after_cs_svd.pkl",
                    "wb") as f1:
                pickle.dump(MRs_each_type_after_cs_svd, f1,
                            pickle.HIGHEST_PROTOCOL)

        # # for group of {equal input, equal output}, {greater, equal}, {less, equal}, use svd to simplify them
        # MRs_equal_equal = {}
        # MRs_greater_equal = {}
        # MRs_less_equal = {}
        # for parameters, candidates_after_CS in AB_all_after_CS.items():
        #     parameters_int = [int(e) for e in parameters.split("_")]
        #     if parameters_int[1] == 1 and parameters_int[2] == 1:
        #         MRs_equal_equal[parameters] = candidates_after_CS
        #     elif parameters_int[1] == 2 and parameters_int[2] == 1:
        #         MRs_greater_equal[parameters] = candidates_after_CS
        #     elif parameters_int[1] == 3 and parameters_int[2] == 1:
        #         MRs_less_equal[parameters] = candidates_after_CS
        #     else:
        #         pass

        # MRs_group_after_svd = {}
        # if len(MRs_equal_equal) > 0:
        #     t1 = datetime.datetime.now()
        #     MRs_group_after_svd["x_1_1_x_x"] = list(svd_check(MRs_equal_equal, NEI, NEO))
        #     t2 = datetime.datetime.now()
        #     cost_time = np.round((t2-t1).total_seconds(), 2)
        #     time_cs_svd["x_1_1_x_x"] = cost_time
        #     stats_after_cs_svd_df["x_1_1_x_x"] = MRs_group_after_svd["x_1_1_x_x"][1].shape[0]
        # if len(MRs_greater_equal) > 0:
        #     t1 = datetime.datetime.now()
        #     MRs_group_after_svd["x_2_1_x_x"] = list(svd_check(MRs_greater_equal, NEI, NEO))
        #     t2 = datetime.datetime.now()
        #     cost_time = np.round((t2-t1).total_seconds(), 2)
        #     time_cs_svd["x_1_1_x_x"] = cost_time
        #     stats_after_cs_svd_df["x_2_1_x_x"] = MRs_group_after_svd["x_2_1_x_x"][1].shape[0]
        # if len(MRs_less_equal) > 0:
        #     t1 = datetime.datetime.now()
        #     MRs_group_after_svd["x_3_1_x_x"] = list(svd_check(MRs_greater_equal, NEI, NEO))
        #     t2 = datetime.datetime.now()
        #     cost_time = np.round((t2-t1).total_seconds(), 2)
        #     time_cs_svd["x_1_1_x_x"] = cost_time
        #     stats_after_cs_svd_df["x_3_1_x_x"] = MRs_group_after_svd["x_3_1_x_x"][1].shape[0]
        #
        # if len(MRs_group_after_svd) > 0:
        #     with open(f"{folder_path}/phase3/{func_index}_MRs_group_after_cs_svd.pkl", "wb") as f2:
        #         pickle.dump(MRs_group_after_svd, f2, pickle.HIGHEST_PROTOCOL)

        # save number of MRs after svd
        # print(stats_after_cs_svd_df)
        for parameters, number in stats_after_cs_svd_df.items():
            results.loc[f"{func_index}_{parameters}", "phase3"] = number

        results.to_csv(f"{folder_path}/counts.csv")

        for parameters, time in time_cs_svd.items():
            times.loc[f"{func_index}_{parameters}", "phase3"] = time