def phase1(pso_runs, output_path, func_index, parameters, inputcases_range, const_range, coeff_range): if not os.path.isdir(output_path): os.mkdir(output_path) if not os.path.isdir(f"{output_path}/phase1"): os.mkdir(f"{output_path}/phase1") no_of_elements_input = settings.getNEI(func_index) no_of_elements_output = settings.getNEO(func_index) no_of_particles = 30 no_of_inputcases = 100 min_cost_candidates = [] A_candidates = [] B_candidates = [] parameters_int = [int(e) for e in parameters.split("_")] no_of_inputs = parameters_int[0] mode_input_relation = parameters_int[1] mode_output_relation = parameters_int[2] degree_of_input_relation = parameters_int[3] degree_of_output_relation = parameters_int[4] pso_run = 0 while True: # print("====================") # print(f" searching: func_index is {func_index}, parameters is {parameters}, pso_run is {pso_run+1}.") # print(f'running pso run {pso_run+1}') AutoMR = PSO(settings.program, func_index, no_of_inputs, mode_input_relation, mode_output_relation, degree_of_input_relation, degree_of_output_relation, no_of_elements_input, no_of_elements_output, no_of_particles, no_of_inputcases, inputcases_range, const_range, coeff_range) min_cost, A, B = AutoMR.run() min_cost_candidates.append(np.round(min_cost, decimals=3)) A_candidates.append(A) B_candidates.append(B) np.savez('{}/phase1/{}_{}_{}_{}_{}_{}.npz'.format( output_path, func_index, no_of_inputs, mode_input_relation, mode_output_relation, degree_of_input_relation, degree_of_output_relation), min_cost_candidates=min_cost_candidates, A_candidates=np.array(A_candidates), B_candidates=np.array(B_candidates)) # print(f"search results:") # print("A:") # print(A) # print("B:") # print(B) # print(f"Corresponding cost is {min_cost}") # print("----------\n") pso_run += 1 if pso_run >= pso_runs: break
def load_phase3_results(result_path): file_name = ntpath.basename(result_path) if result_path.endswith(".npz"): parameters, df_x_all, df_y_all = load_npz_to_pandas(result_path) print(df_x_all.to_string()) print(df_y_all.to_string()) return {parameters:[df_x_all, df_y_all]} # for the MRs stored in pkl format elif result_path.endswith(".pkl"): dict_MRs = {} with open(result_path, "rb") as f: MRs_dict = pickle.load(f) func_index = int(file_name[0:file_name.find('_')]) NEI = settings.getNEI(func_index) for parameters, MRs in MRs_dict.items(): print('-'*10) print(f"func_index is {func_index}, NOI_MIR_MOR_DIR_DOR is {parameters}:") parameters_int = [int(e) for e in parameters.split("_")] NOI = parameters_int[0] MIR = parameters_int[1] MOR = parameters_int[2] DIR = parameters_int[3] DOR = parameters_int[4] x_all_dict = MRs[0] u = str_comb([f"i0_{i+1}" for i in range(NEI)], DIR) df_x_all = pd.DataFrame(columns=u) for k, v in x_all_dict.items(): for idx_e in range(NEI): df_x_all.loc[f'{k}_{idx_e+1}'] = v[idx_e] y_all_df = MRs[1] # print(type(y_all_df)) y_all_df.index = [f'MR{i+1}' for i in y_all_df.index.values] print(df_x_all.to_string()) print(y_all_df.to_string()) dict_MRs[parameters] = [df_x_all, y_all_df] return dict_MRs
def load_npz_to_pandas(result_path): file_name = ntpath.basename(result_path) func_index = int(file_name[0:file_name.find('_')]) parameters = file_name[-26:-17] parameters_int = [int(e) for e in parameters.split("_")] candidates_all = np.load(result_path) A_candidates = candidates_all["A_candidates"] B_candidates = candidates_all["B_candidates"] MRs = {parameters: [A_candidates, B_candidates]} hNOI = parameters_int[0] hDIR = parameters_int[3] hDOR = parameters_int[4] # to store all distinct As x_all = {} NEI = settings.getNEI(func_index) NEO = settings.getNEO(func_index) hu = str_comb([f"i0_{i+1}" for i in range(NEI)], hDIR) MR_all = [] for parameters, AB_after_CS in MRs.items(): parameters_int = [int(e) for e in parameters.split("_")] NOI = parameters_int[0] MIR = parameters_int[1] MOR = parameters_int[2] DIR = parameters_int[3] DOR = parameters_int[4] As = AB_after_CS[0] Bs = AB_after_CS[1] u = str_comb([f"i0_{i+1}" for i in range(NEI)], DIR) for i_A in range(As.shape[0]): A = As[i_A] # store the fx o_orig = ["o0"] for i_NOI in range(A.shape[0]): A_iNOI = A[i_NOI] # check whether add a new x or not x_temp = pd.DataFrame(columns=hu) for i_EOI in range(NEI): x_temp_iEOI = pd.DataFrame([A_iNOI[i_EOI]], columns=u, index=[f"e{i_EOI+1}"]) x_temp = x_temp.append(x_temp_iEOI, ignore_index=False, sort=False) x_temp = x_temp.fillna(0) # print(x_temp) isNew = True for x, A_x in x_all.items(): # print(A_x) # print(x_temp.values) isExist = np.allclose(A_x, x_temp.values,atol=0.05, rtol=0.1, equal_nan=True) # print(isExist) if isExist: o_orig.append(f"o{x}") isNew = False break if isNew: # print(len(x_all)) number_of_x = len(x_all) x_all[f"i{number_of_x + 1}"] = x_temp.values o_orig.append(f"o{number_of_x + 1}") # print(o_orig) # create corresponding output elemets o = [] for i in range(len(o_orig)): for i_ele in range(NEO): o.append(f"{o_orig[i]}_{i_ele + 1}") # print(o) # create v v = str_comb(o, DOR) # print(v) MR = pd.DataFrame([Bs[i_A]], columns=v) MR = MR.groupby(MR.columns, axis=1).sum() # print(MR.columns) MR_all.append(MR) # for i in range(len(MR_all)): # print(MR_all[i].columns) MR_all_df = pd.concat((df for df in MR_all), ignore_index=True, sort=True) y_all = MR_all_df.columns # print(len(y_all)) MR_all_df = MR_all_df.fillna(0) df_x_all = pd.DataFrame(columns=hu) for k, v in x_all.items(): for idx_e in range(NEI): df_x_all.loc[f'{k}_{idx_e+1}'] = v[idx_e] MR_all_df.index = [f'MR{i+1}' for i in MR_all_df.index.values] return parameters, df_x_all, MR_all_df
def phase2(output_path, parameters, func_index, output_name): no_of_inputcases = 100 if os.path.isfile(f"{output_path}/"): file_statistics = pd.read_csv(f"{output_path}/counts.csv", index_col=0) else: file_statistics = pd.DataFrame() parameters_int = [int(e) for e in parameters.split("_")] no_of_inputs = parameters_int[0] mode_input_relation = parameters_int[1] mode_output_relation = parameters_int[2] degree_of_input_relation = parameters_int[3] degree_of_output_relation = parameters_int[4] no_of_elements_input = settings.getNEI(func_index) no_of_elements_output = settings.getNEO(func_index) A_candidates_after_filter = [] B_candidates_after_filter = [] ini_count = 0 survive_count = 0 results_all = np.load('{}/phase1/{}'.format(output_path, output_name)) min_cost_candidates = results_all['min_cost_candidates'] A_candidates = results_all['A_candidates'] B_candidates = results_all['B_candidates'] all_count = min_cost_candidates.shape[0] for index_candidate in range(all_count): min_cost = min_cost_candidates[index_candidate] A = A_candidates[index_candidate] B = B_candidates[index_candidate] isPass = True isPassPhase1 = False if mode_output_relation == 1: if min_cost < 5: ini_count += 1 isPassPhase1 = True else: if min_cost < 0.05: ini_count += 1 isPassPhase1 = True if isPassPhase1: for index_test in range(100): i0_all = Phase1_PSOSearch.generate_i0_all( settings.get_input_datatype(func_index), settings.get_input_range(func_index), no_of_inputcases) survive_cost = get_cost_of_AB(settings.program, func_index, A, B, i0_all, mode_input_relation, mode_output_relation, degree_of_input_relation, degree_of_output_relation, no_of_elements_output) if survive_cost >= 0.05: isPass = False break if isPass: survive_count += 1 A_candidates_after_filter.append(A) B_candidates_after_filter.append(B) results_all.close() A_candidates_after_filter = np.array(A_candidates_after_filter) B_candidates_after_filter = np.array(B_candidates_after_filter) if not os.path.isdir("{}/phase2".format(output_path)): os.mkdir("{}/phase2".format(output_path)) np.savez( f'{output_path}/phase2/{func_index}_{parameters}_after_filter.npz', A_candidates=A_candidates_after_filter, B_candidates=B_candidates_after_filter) file_statistics.loc[f"{func_index}_{parameters}", "pso"] = all_count file_statistics.loc[f"{func_index}_{parameters}", "phase1"] = ini_count file_statistics.loc[f"{func_index}_{parameters}", "phase2"] = survive_count
def checkAfterSVD(folder_path, func_indices): filer_phase3_df = pd.read_csv(f"{folder_path}/counts.csv", index_col=0) for func_index in func_indices: no_of_elements_output = settings.getNEO(func_index) no_of_elements_input = settings.getNEI(func_index) inputcases_range = settings.get_input_range(func_index) no_of_testcases = 100 MRs_types = os.listdir(f"{folder_path}/phase3") for MRs_type in MRs_types: # for the MRs stored in npz format if MRs_type.startswith(f"{func_index}_") and MRs_type.endswith( ".npz"): pass # for the MRs stored in pkl format elif MRs_type.startswith(f"{func_index}_") and MRs_type.endswith( "group_after_cs_svd.pkl"): i0_all = Phase1_PSOSearch.generate_i0_all( settings.get_input_datatype(func_index), settings.get_input_range(func_index), no_of_testcases) with open(f"{folder_path}/phase3/{MRs_type}", "rb") as f: MRs_dict = pickle.load(f) for parameters, MRs in MRs_dict.items(): filer_phase3 = 0 # print(f"func_index is {func_index}, parameters = {parameters}") x_all_dict = MRs[0] # print(x_all_dict) y_all_df = MRs[1] hDIR = MRs[2] y_o_isKill_df = pd.DataFrame() for index_i0 in range(i0_all.shape[0]): i0 = i0_all[index_i0] u = Phase1_PSOSearch.comb(i0, hDIR) x_value_dict = {} y_element_value_dict = {} for x_name, A in x_all_dict.items(): # print(f"x_name is {x_name}") # print(f"A is {A}") x = np.dot(A, u) x_value_dict[x_name] = x y = settings.program(x, func_index) for index_eo in range(no_of_elements_output): y_element_value_dict[ f"f{x_name}_{index_eo + 1}"] = y[index_eo] y0 = settings.program(i0, func_index) for index_eo in range(no_of_elements_output): y_element_value_dict[f"fx0_{index_eo + 1}"] = y0[ index_eo] y_all_names = y_all_df.columns.values y_all_values = np.zeros(y_all_names.shape) for index_y in range(y_all_names.shape[0]): y_names = list(y_all_names[index_y]) y_elements = [] for ii in range(len(y_names)): try: y_elements.append(float(y_names[ii])) except: y_elements.append( y_element_value_dict[y_names[ii]]) y_all_values[index_y] = np.product(y_elements) for index_MR in range(y_all_df.shape[0]): B = y_all_df.iloc[index_MR, :].values Bv = np.dot(B, y_all_values) if np.isreal(Bv) and not np.isnan(Bv): if np.abs(Bv) < 0.1: y_o_isKill_df.loc[index_MR, index_i0] = 0 else: y_o_isKill_df.loc[index_MR, index_i0] = 1 else: y_o_isKill_df.loc[index_MR, index_i0] = 1 for index_MR in range(y_o_isKill_df.shape[0]): kill_o_number = np.sum( y_o_isKill_df.iloc[index_MR, :].values) cost_o = np.divide(kill_o_number, no_of_testcases) if cost_o < 0.05: filer_phase3 += 1 else: # print(MRs_dict[parameters][1]) MRs_dict[parameters][1] = MRs_dict[parameters][ 1].drop([index_MR]) # print("----------") # print(parameters) # print(f"before filter {len(y_all_df)}") # print(f"after filter left {filer_phase3}") filer_phase3_df.loc[f"{func_index}_{parameters}", "phase3"] = filer_phase3 with open( f"{folder_path}/phase3/{func_index}_MRs_group_after_cs_svd.pkl", "wb") as f2: pickle.dump(MRs_dict, f2, pickle.HIGHEST_PROTOCOL) elif MRs_type.startswith(f"{func_index}_") and MRs_type.endswith( "other_types_after_cs_svd.pkl"): i0_all = Phase1_PSOSearch.generate_i0_all( settings.get_input_datatype(func_index), settings.get_input_range(func_index), no_of_testcases) with open(f"{folder_path}/phase3/{MRs_type}", "rb") as f: MRs_dict = pickle.load(f) for parameters, MRs in MRs_dict.items(): filer_phase3 = 0 parameters_int = [int(e) for e in parameters.split("_")] no_of_inputs = parameters_int[0] mode_input_relation = parameters_int[1] mode_output_relation = parameters_int[2] degree_of_input_relation = parameters_int[3] degree_of_output_relation = parameters_int[4] x_all_dict = MRs[0] y_all_df = MRs[1] y_o_isKill_df = pd.DataFrame() for index_i0 in range(i0_all.shape[0]): i0 = i0_all[index_i0] u = Phase1_PSOSearch.comb(i0, degree_of_input_relation) # print(u) x_value_dict = {} y_element_value_dict = {} for x_name, A in x_all_dict.items(): # print(x_name) # print(A) x = np.dot(A, u) x_value_dict[x_name] = x y = settings.program(x, func_index) for index_eo in range(no_of_elements_output): y_element_value_dict[ f"f{x_name}_{index_eo + 1}"] = y[index_eo] y0 = settings.program(i0, func_index) for index_eo in range(no_of_elements_output): y_element_value_dict[f"fx0_{index_eo + 1}"] = y0[ index_eo] y_all_names = y_all_df.columns.values y_all_values = np.zeros(y_all_names.shape) for index_y in range(y_all_names.shape[0]): y_names = list(y_all_names[index_y]) y_elements = [] for ii in range(len(y_names)): try: y_elements.append(float(y_names[ii])) except: y_elements.append( y_element_value_dict[y_names[ii]]) y_all_values[index_y] = np.product(y_elements) for index_MR in range(y_all_df.shape[0]): B = y_all_df.iloc[index_MR, :].values Bv = np.dot(B, y_all_values) if np.isreal(Bv) and not np.isnan(Bv): if np.abs(Bv) < 0.1: y_o_isKill_df.loc[index_MR, index_i0] = 0 else: y_o_isKill_df.loc[index_MR, index_i0] = 1 else: y_o_isKill_df.loc[index_MR, index_i0] = 1 for index_MR in range(y_o_isKill_df.shape[0]): kill_o_number = np.sum( y_o_isKill_df.iloc[index_MR, :].values) cost_o = np.divide(kill_o_number, no_of_testcases) if cost_o < 0.05: filer_phase3 += 1 else: MRs_dict[parameters][1] = MRs_dict[parameters][ 1].drop([index_MR]) # print("----------") # print(parameters) # print(f"before filter {len(y_all_df)}") # print(f"after filter left {filer_phase3}") filer_phase3_df.loc[f"{func_index}_{parameters}", "phase3"] = filer_phase3 with open( f"{folder_path}/phase3/{func_index}_MRs_other_types_after_cs_svd.pkl", "wb") as f2: pickle.dump(MRs_dict, f2, pickle.HIGHEST_PROTOCOL)
def phase3(folder_path, func_indices, coeff_range, const_range): if not os.path.isdir(f"{folder_path}/phase3"): os.mkdir(f"{folder_path}/phase3") if os.path.isfile(f"{folder_path}/counts.csv"): results = pd.read_csv(f"{folder_path}/counts.csv", index_col=0) else: results = pd.DataFrame() if os.path.isfile(f"{folder_path}/performance.csv"): times = pd.read_csv(f"{folder_path}/performance.csv", index_col=0) else: times = pd.DataFrame() for func_index in func_indices: stats_after_cs_svd_df = {} time_cs_svd = {} # get NEI NEO NEI = settings.getNEI(func_index) NEO = settings.getNEO(func_index) # select the filtered MRs of the func_name AB_all = {} for filename in os.listdir(f"{folder_path}/phase2"): if filename.startswith(f"{func_index}_") and filename.endswith( "after_filter.npz"): # example: np_log1p_2_1_1_1_1_after_filter.npz parameters = filename[-26:-17] time_cs_svd[parameters] = 0 MRs = np.load(f"{folder_path}/phase2/{filename}") As = MRs["A_candidates"] Bs = MRs["B_candidates"] if As.shape[0] > 1: AB_all[parameters] = [As, Bs] elif As.shape[0] == 1: stats_after_cs_svd_df[parameters] = 1 np.savez( f'{folder_path}/phase3/{func_index}_{parameters}_after_cs_svd.npz', A_candidates=As, B_candidates=Bs) else: stats_after_cs_svd_df[parameters] = 0 # filer using CS AB_all_after_CS = {} MRs_each_type_after_cs_svd = {} for parameters, ABs in AB_all.items(): t1 = datetime.datetime.now() parameters_int = [int(e) for e in parameters.split("_")] NOI = parameters_int[0] MIR = parameters_int[1] MOR = parameters_int[2] DIR = parameters_int[3] DOR = parameters_int[4] A_candidates = ABs[0] B_candidates = ABs[1] A_candidates_after_CS, B_candidates_after_CS, before_CS, after_CS = z3_check( NEI, NEO, NOI, MIR, MOR, DIR, DOR, A_candidates, B_candidates, coeff_range, const_range) AB_all_after_CS[parameters] = [ A_candidates_after_CS, B_candidates_after_CS ] # for output inequality MRs, can't use svd so just save cs results if parameters_int[2] != 1: stats_after_cs_svd_df[ parameters] = A_candidates_after_CS.shape[0] np.savez( f'{folder_path}/phase3/{func_index}_{parameters}_after_cs_svd.npz', A_candidates=A_candidates_after_CS, B_candidates=B_candidates_after_CS) # do svd for output equality MRs else: # do svd for the type which has more than 1 MRs if A_candidates_after_CS.shape[0] > 1: MRs_each_type_after_cs_svd[parameters] = list( svd_check({parameters: AB_all_after_CS[parameters]}, NEI, NEO)) stats_after_cs_svd_df[ parameters] = MRs_each_type_after_cs_svd[parameters][ 1].shape[0] else: stats_after_cs_svd_df[ parameters] = A_candidates_after_CS.shape[0] np.savez( f'{folder_path}/phase3/{func_index}_{parameters}_after_cs_svd.npz', A_candidates=A_candidates_after_CS, B_candidates=B_candidates_after_CS) t2 = datetime.datetime.now() cost_time = np.round((t2 - t1).total_seconds(), 2) time_cs_svd[parameters] = time_cs_svd[parameters] + cost_time if len(MRs_each_type_after_cs_svd) > 0: with open( f"{folder_path}/phase3/{func_index}_MRs_other_types_after_cs_svd.pkl", "wb") as f1: pickle.dump(MRs_each_type_after_cs_svd, f1, pickle.HIGHEST_PROTOCOL) # # for group of {equal input, equal output}, {greater, equal}, {less, equal}, use svd to simplify them # MRs_equal_equal = {} # MRs_greater_equal = {} # MRs_less_equal = {} # for parameters, candidates_after_CS in AB_all_after_CS.items(): # parameters_int = [int(e) for e in parameters.split("_")] # if parameters_int[1] == 1 and parameters_int[2] == 1: # MRs_equal_equal[parameters] = candidates_after_CS # elif parameters_int[1] == 2 and parameters_int[2] == 1: # MRs_greater_equal[parameters] = candidates_after_CS # elif parameters_int[1] == 3 and parameters_int[2] == 1: # MRs_less_equal[parameters] = candidates_after_CS # else: # pass # MRs_group_after_svd = {} # if len(MRs_equal_equal) > 0: # t1 = datetime.datetime.now() # MRs_group_after_svd["x_1_1_x_x"] = list(svd_check(MRs_equal_equal, NEI, NEO)) # t2 = datetime.datetime.now() # cost_time = np.round((t2-t1).total_seconds(), 2) # time_cs_svd["x_1_1_x_x"] = cost_time # stats_after_cs_svd_df["x_1_1_x_x"] = MRs_group_after_svd["x_1_1_x_x"][1].shape[0] # if len(MRs_greater_equal) > 0: # t1 = datetime.datetime.now() # MRs_group_after_svd["x_2_1_x_x"] = list(svd_check(MRs_greater_equal, NEI, NEO)) # t2 = datetime.datetime.now() # cost_time = np.round((t2-t1).total_seconds(), 2) # time_cs_svd["x_1_1_x_x"] = cost_time # stats_after_cs_svd_df["x_2_1_x_x"] = MRs_group_after_svd["x_2_1_x_x"][1].shape[0] # if len(MRs_less_equal) > 0: # t1 = datetime.datetime.now() # MRs_group_after_svd["x_3_1_x_x"] = list(svd_check(MRs_greater_equal, NEI, NEO)) # t2 = datetime.datetime.now() # cost_time = np.round((t2-t1).total_seconds(), 2) # time_cs_svd["x_1_1_x_x"] = cost_time # stats_after_cs_svd_df["x_3_1_x_x"] = MRs_group_after_svd["x_3_1_x_x"][1].shape[0] # # if len(MRs_group_after_svd) > 0: # with open(f"{folder_path}/phase3/{func_index}_MRs_group_after_cs_svd.pkl", "wb") as f2: # pickle.dump(MRs_group_after_svd, f2, pickle.HIGHEST_PROTOCOL) # save number of MRs after svd # print(stats_after_cs_svd_df) for parameters, number in stats_after_cs_svd_df.items(): results.loc[f"{func_index}_{parameters}", "phase3"] = number results.to_csv(f"{folder_path}/counts.csv") for parameters, time in time_cs_svd.items(): times.loc[f"{func_index}_{parameters}", "phase3"] = time