APPROACHES = ['KNN'] TRAINING_SET_SIZE = [10000, 30000, 50000] # TRAINING_SET_SIZE = [int(sys.argv[1])] TEST_SET_SIZE = [50000] # TEST_SET_SIZE = [int(sys.argv[3])] VALIDATION_SET_SIZE = [1000, 3000, 5000] # VALIDATION_SET_SIZE = [int(sys.argv[2])] # TEST_ITERATIONS = 1 # 100 TEST_ITERATIONS_BEG = 0 TEST_ITERATIONS_END = 50 TRAIN_ITERATIONS = 3 ENHANCED = False RESULT_FOLDER = "/home/comete/mromanel/MILES_EXP/EXP_PSW/RESULT_FOLDER_REMAPPING/" utilities.createFolder(RESULT_FOLDER) RESULT_FOLDER = "/home/comete/mromanel/MILES_EXP/EXP_PSW/RESULT_FOLDER_REMAPPING/KNN/" utilities.createFolder(RESULT_FOLDER) DATA_FOLDER = "/home/comete/mromanel/MILES_EXP/EXP_PSW/DATA_FOLDER_AFTER_OUR_PREPROCESSING/" DATA_FOLDER_TEST = "/home/comete/mromanel/MILES_EXP/EXP_PSW/DATA_FOLDER/" G_MATRIX_PATH = '/home/comete/mromanel/MILES_EXP/EXP_PSW/G_MAT_FOLDER/G_MAT' def read_command_line_options(): thismodule = sys.modules[__name__] for idx, key_val in enumerate(sys.argv, 0):
def main_EXP_G_VULN_GEO_LOCATION_create_data(): channel_matrix = pn.read_pickle(path=CHANNEL_MATRIX_FILE) secrets_occurr_dictionary = pn.read_pickle( path=ORIGINAL_SECRETS_OCCURRENCES_FILE) # print "secrets_occurr_dictionary ===> ", secrets_occurr_dictionary tot_occurr = 0 maxx_occurr = 0 for secret in secrets_occurr_dictionary: current_secret_occurr = secrets_occurr_dictionary[secret] if current_secret_occurr > maxx_occurr: maxx_occurr = current_secret_occurr tot_occurr += current_secret_occurr # print secret, " ===> ", current_secret_occurr # print "maxx_occurr ===> ", maxx_occurr secrets_prior_dictionary = {} maxx_freq = 0 for secret in secrets_occurr_dictionary: secrets_prior_dictionary[ secret] = secrets_occurr_dictionary[secret] / float(tot_occurr) if secrets_prior_dictionary[secret] > maxx_freq: maxx_freq = secrets_prior_dictionary[secret] # print "secrets_prior_dictionary ===> ", secrets_prior_dictionary # print "maxx_freq ===> ", maxx_freq for mult_card in MULTIPLICATIVE_FACTOR_FOR_SETS_CARDINALITY: reform_secrets_occurr_dictionary_tr_ts = {} reform_secrets_occurr_dictionary_val = {} for secret in secrets_occurr_dictionary: reform_secrets_occurr_dictionary_tr_ts[secret] = int( round(secrets_occurr_dictionary[secret] * float(mult_card), 0)) reform_secrets_occurr_dictionary_val[secret] = int( round( secrets_occurr_dictionary[secret] * float(mult_card) * float(VALIDATION_CARD_AS_FRACTION_OF_TR_CARD), 0)) training_set_size = 0 for keys in reform_secrets_occurr_dictionary_tr_ts: training_set_size += reform_secrets_occurr_dictionary_tr_ts[keys] test_set_size = training_set_size validation_set_size = 0 for keys in reform_secrets_occurr_dictionary_val: validation_set_size += reform_secrets_occurr_dictionary_val[keys] # print "reform_secrets_occurr_dictionary_tr_ts ===> ", reform_secrets_occurr_dictionary_tr_ts for train_iteration in tqdm(range(TRAINING_ITERATIONS)): training_and_validation_and_test_set_store_folder = DATA_FOLDER + str( training_set_size) + "_training_and_" + str( validation_set_size) + "_validation_and_" + str( test_set_size ) + "_test_store_folder_train_iteration_" + str( train_iteration) + "/" utilities.createFolder( training_and_validation_and_test_set_store_folder) training_set_mat = sample_from_channel( channel_matrix=channel_matrix, rndmstt=utilities.create_new_rndm_state(), samples_per_secret_dictionary= reform_secrets_occurr_dictionary_tr_ts) training_df = pn.DataFrame(data=training_set_mat, columns=["O_train", "S_train"]) training_df.to_pickle( path=training_and_validation_and_test_set_store_folder + "/training_set.pkl") print training_set_mat.shape ################################################################################################################ validation_set_mat = sample_from_channel( channel_matrix=channel_matrix, rndmstt=utilities.create_new_rndm_state(), samples_per_secret_dictionary= reform_secrets_occurr_dictionary_val) validation_df = pn.DataFrame(data=validation_set_mat, columns=["O_val", "S_val"]) validation_df.to_pickle( path=training_and_validation_and_test_set_store_folder + "/validation_set.pkl") print validation_set_mat.shape ################################################################################################################ for test_iteration in range(TEST_ITERATIONS): test_set_mat = sample_from_channel( channel_matrix=channel_matrix, rndmstt=utilities.create_new_rndm_state(), samples_per_secret_dictionary= reform_secrets_occurr_dictionary_tr_ts) test_set_store_folder = training_and_validation_and_test_set_store_folder + str( test_set_size) + "_size_test_sets/" utilities.createFolder(path=test_set_store_folder) test_df = pn.DataFrame(data=test_set_mat, columns=["O_test", "S_test"]) test_df.to_pickle(path=test_set_store_folder + "/test_set_" + str(test_iteration) + ".pkl")
def main_EXP_G_VULN_PSW_train_single_ANN_remapping(): read_command_line_options() thismodule = sys.modules[__name__] EXP_PSW_FOLDER = "/home/comete/mromanel/MILES_EXP/EXP_PSW/" utilities.createFolder(EXP_PSW_FOLDER) RESULT_FOLDER = EXP_PSW_FOLDER + "RESULT_FOLDER_REMAPPING/" utilities.createFolder(RESULT_FOLDER) result_folder = RESULT_FOLDER + MODEL_NAME + "/" utilities.createFolder(result_folder) result_folder = result_folder + str( TRAINING_SIZE) + "_training_size_and_" + str( VALIDATION_SIZE) + "_validation_size_iteration_" + str( TRAINING_ITERATION) + "/" utilities.createFolder(result_folder) DATA_FOLDER = EXP_PSW_FOLDER + "DATA_FOLDER_AFTER_OUR_PREPROCESSING/" ANN_data_folder = DATA_FOLDER + str(TRAINING_SIZE) + "_training_and_" + str( VALIDATION_SIZE) + "_validation_store_folder_train_iteration_" + str( TRAINING_ITERATION) # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% load datasets %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% print "\n\n\nDATA ARE LOADED FROM ", ANN_data_folder, "\n\n\n" log_file = open(result_folder + "/log_file.txt", "wa") log_file.write("\n\n\nDATA ARE LOADED FROM " + ANN_data_folder + "\n\n\n") log_file.close() training_set = pn.read_pickle(path=ANN_data_folder + "/training_set.pkl") O_train = training_set[:, 0] Z_train = training_set[:, 1] Z_train_enc = to_categorical(y=Z_train, num_classes=NUM_CLASSES) val_set = pn.read_pickle(path=ANN_data_folder + "/training_set.pkl") O_val = val_set[:, 0] Z_val = val_set[:, 1] Z_val_enc = to_categorical(y=Z_val, num_classes=NUM_CLASSES) """O_train = preprocess.scaler_between_minus_one_and_one(column=O_train, min_column=MIN_OBSERVABLE, max_column=MAX_OBSERVABLE) O_val = preprocess.scaler_between_minus_one_and_one(column=O_val, min_column=MIN_OBSERVABLE, max_column=MAX_OBSERVABLE)""" min_max_scaler = preprocessing.MinMaxScaler() O_train = O_train.reshape(-1, 1) O_train = min_max_scaler.fit_transform(O_train) O_val = O_val.reshape(-1, 1) O_val = min_max_scaler.transform(O_val) # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ANN: instantiate, train, evaluate %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% if len(O_train.shape) == 1: input_x_dimension = 1 else: input_x_dimension = O_train.shape[1] if thismodule.BATCH_SIZE is None: thismodule.BATCH_SIZE = O_train.shape[0] secrets_classifier_manager = secrets_classifier.ClassifierNetworkManager( number_of_classes=Z_train_enc.shape[1], learning_rate=LEARNING_RATE, hidden_layers_card=HIDDEN_LAYERS_CARD, hidden_neurons_card=HIDDEN_NEAURONS_CARD, epochs=EPOCHS, batch_size=BATCH_SIZE, id_gpu=ID_GPU, perc_gpu=PERC_GPU, input_x_dimension=input_x_dimension) secrets_classifier_manager.train_classifier_net( training_set=O_train, training_supervision=Z_train_enc, validation_set=O_val, validation_supervision=Z_val_enc, results_folder=result_folder)
SMALL_SQUARE_SIZE = 5000 # BIG_SQUARE_SIZE = 6000 SMALL_SQUARE_CELL_SIDE_LENGTH = 250 CELLS_PER_SIDE = SMALL_SQUARE_SIZE // SMALL_SQUARE_CELL_SIDE_LENGTH DATABASE_PATH = "/home/comete/mromanel/MILES_EXP/gowalla/loc-gowalla_totalCheckins.txt" COLNAMES = [ "user", "check_in_timestamp", "latitude", "longitude", "location_id" ] KEEP_COLS = ["latitude", "longitude"] STORE_FOLDER = "/home/comete/mromanel/MILES_EXP/EXP_GEO_LOCATION_QIF_LIB_SETTING/" utilities.createFolder(STORE_FOLDER) def main_EXP_GEO_LOCATION_QIF_LIB_SETTING_retrieve_data_from_DB(): ######################################################################################################################## ############################################# SQUARES OF INTEREST #################################################### ######################################################################################################################## center = position_class.position(lat=CENTER_LAT, lon=CENTER_LON) small_square_min_lat, small_square_max_lat, small_square_min_lon, small_square_max_lon = \ SOI_utilities.create_square_limit( central_position=center, side_length=SMALL_SQUARE_SIZE) SMALL_SQUARE = { "square_min_lat": small_square_min_lat,
from tqdm import tqdm from utilities_pckg import utilities from qif import channel, measure, probab, metric, point, mechanism, lp import math from tabulate import tabulate tqdm.monitor_interval = 0 TRAINING_SET_SIZE = [100, 1000, 10000, 30000, 50000] # [90000, 270000, 450000] # TEST_SET_SIZE = [90000] # [90000, 270000, 450000] VALIDATION_SET_SIZE = [10, 100, 1000, 3000, 5000] # [9000, 27000, 45000] TEST_ITERATIONS = 50 TRAIN_ITERATIONS = 5 BIS_EXP_GEO_LOCATION_FOLDER = "/home/comete/mromanel/MILES_EXP/BIS_EXP_GEO_LOCATION_QIF_LIB_SETTING/" utilities.createFolder(BIS_EXP_GEO_LOCATION_FOLDER) WIDTH = 20 # in cells HEIGHT = 20 # in cells CELL_SIZE = 250. # in length units (meters) EUCLID = euclid = metric.euclidean(point) # Euclidean distance on qif.point MAX_GAIN = 4 ALPHA = 0.95 DATA_FOLDER = BIS_EXP_GEO_LOCATION_FOLDER + "DATA_FOLDER/" utilities.createFolder(DATA_FOLDER) G_MAT_PATH = BIS_EXP_GEO_LOCATION_FOLDER + "G_OBJ/g_mat.pkl" # set solver lp.defaults.solver = "GLOP"
def main_BIS_EXP_GEO_LOCATION_create_channel_and_data(): # grid # diagonal of the grid diag = euclid(point(0, 0), point(CELL_SIZE * WIDTH, CELL_SIZE * HEIGHT)) # loss function, just euclidean distance loss = euclid_cell # some sanity checks sanity_checks(considered_cell=132) max_vuln = f(CELL_SIZE) # maximum allowed posterior g-vulnerability hard_max_loss = 2 * CELL_SIZE # loss(x,y) > hard_max_loss => C[x,y] = 0 n_secrets = n_outputs = n_guesses = WIDTH * HEIGHT pi_dic = pn.read_pickle( path= "/home/comete/mromanel/MILES_EXP/BIS_EXP_GEO_LOCATION_QIF_LIB_SETTING/file_prior_distr.pkl" ) # print("\n\n\npi dictionary ---> pi[cell]:cell_probability") print(pi_dic) pi_mat = np.zeros((WIDTH, HEIGHT)) for i_ter in range(WIDTH): for j_ter in range(HEIGHT): cell_id = WIDTH * i_ter + j_ter pi_mat[i_ter, j_ter] = pi_dic[cell_id] print( "\n\n\n Table for pi where up is south, down is north, left is west, right is east." ) headers = [ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19" ] table_pi_mat = tabulate(pi_mat, headers, tablefmt="fancy_grid") print(table_pi_mat) G = pn.read_pickle(G_MAT_PATH) # pi_mat_map = np.flip(pi_mat, 0) # print("\n\n\n") # print("\n\n\n Table for pi where up is north, down is south, left is west, right is east.") # table_pi_mat_map = tabulate(pi_mat_map, headers, tablefmt="fancy_grid") # print(table_pi_mat_map) pi = pi_mat.flatten() # probab.uniform(n_secrets) # uniform prior # print("\n\n\npi ---> such that pi[i] = prob_cell[i]") print(pi) ############################ list_of_cells_probs = [] for id_cell_ind in range(len(pi_dic)): list_of_cells_probs.append(pi_dic[id_cell_ind]) # sanity check for i in range(len(pi)): if pi[i] != list_of_cells_probs[i]: sys.exit("ERROR in prior") print(euclid_cell(13, 20)) print(euclid_cell(20, 13)) # solve C = mechanism.g_vuln.min_loss_given_max_vuln(pi, n_outputs, n_guesses, max_vuln, gain, loss, hard_max_loss) # get rho, R, a, b (rho, R, a, b) = measure.g_vuln.g_to_bayes(G, pi) print("a --->" + str(a)) print("b --->" + str(b)) # for any C we have Vg[pi, C] = a * V[rho, RC] + b print(" Vg[pi, C]: ", measure.g_vuln.posterior(G, pi, C)) print("a * V[rho, RC] + b:", a * measure.bayes_vuln.posterior(rho, R.dot(C)) + b) print( len( np.unique( create_single_dataset(size=10000, R=R, rho=rho, C=C)[:, 0]))) # so we can estimate Vg in a black-box matter by generating samples according to rho and RC ! if len(VALIDATION_SET_SIZE) != len(TRAINING_SET_SIZE): sys.exit("ERROR! Different size lists' lengths.") for size_list_iterator in range(len(TRAINING_SET_SIZE)): training_set_size = TRAINING_SET_SIZE[size_list_iterator] validation_set_size = VALIDATION_SET_SIZE[size_list_iterator] # test_set_size = TEST_SET_SIZE[size_list_iterator] for train_iteration in range(TRAIN_ITERATIONS): training_and_validation_and_test_set_store_folder = DATA_FOLDER + str( training_set_size) + "_training_and_" + str( validation_set_size ) + "_validation_store_folder_train_iteration_" + str( train_iteration) + "/" utilities.createFolder( path=training_and_validation_and_test_set_store_folder) tr = create_single_dataset(size=training_set_size, R=R, rho=rho, C=C) val = create_single_dataset(size=validation_set_size, R=R, rho=rho, C=C) pn.to_pickle( obj=tr, path=training_and_validation_and_test_set_store_folder + "training_set.pkl", protocol=2) pn.to_pickle( obj=val, path=training_and_validation_and_test_set_store_folder + "validation_set.pkl", protocol=2) print("\n\n\nSize " + str(TRAINING_SET_SIZE[size_list_iterator]) + ", train iteration " + str(train_iteration))
def main_EXP_G_VULN_MULTIPLE_GUESSES_create_data(): # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% geometric distribution loading %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% print( "\n####################################################################################" ) print( "######################### geometric distribution loading #########################" ) print( "####################################################################################\n" ) utilities.createFolder(DATA_FOLDER) channel_matrix_df = pn.read_pickle(path=CHANNEL_PATH) # sanity check for i in range(len(channel_matrix_df.columns.values) - 1): if channel_matrix_df.index.values[ i + 1] <= channel_matrix_df.index.values[i]: import sys sys.exit("BAD CHANNEL FORMAT: cols") for i in range(len(channel_matrix_df.index.values) - 1): if channel_matrix_df.index.values[ i + 1] <= channel_matrix_df.index.values[i]: import sys sys.exit("BAD CHANNEL FORMAT: rows") channel_matrix = np.transpose(channel_matrix_df.values) print(channel_matrix.shape) print("Vg(pi, C)", measure.g_vuln.posterior(gain, pi, C=channel_matrix)) # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% create training sets %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # X are the observables and y are the secrets (respectively col 0 and 1), stratify wrt to secret # split training and test data # mt = create_single_dataset(size=50000, C=channel_matrix) # print(mt) # print(len(np.unique(mt[:, 0]))) if len(VALIDATION_SET_SIZE) != len(TRAINING_SET_SIZE): err_hndl(str_="array_sizes_not_matching", add=inspect.stack()[0][3]) for size_list_iterator in range(len(TRAINING_SET_SIZE)): training_set_size = TRAINING_SET_SIZE[size_list_iterator] validation_set_size = VALIDATION_SET_SIZE[size_list_iterator] for train_iteration in range(TRAIN_ITERATIONS): training_set_mat = create_single_dataset(size=training_set_size, C=channel_matrix) training_and_validation_and_test_set_store_folder = DATA_FOLDER + str( training_set_size) + "_training_and_" + str( validation_set_size ) + "_validation_store_folder_train_iteration_" + str( train_iteration) + "/" utilities.createFolder( path=training_and_validation_and_test_set_store_folder) training_df = pn.DataFrame(data=training_set_mat, columns=["O_train", "S_train"]) pn.to_pickle( obj=training_df.values, path=training_and_validation_and_test_set_store_folder + "/training_set.pkl", protocol=2) ################################################################################################################ validation_set_mat = create_single_dataset( size=validation_set_size, C=channel_matrix) validation_df = pn.DataFrame(data=validation_set_mat, columns=["O_val", "S_val"]) pn.to_pickle( obj=validation_df.values, path=training_and_validation_and_test_set_store_folder + "/validation_set.pkl", protocol=2) # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% create test sets %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% if CREATE_TEST_SET: list_unq = [] print( "\n####################################################################################" ) print( "################################# create test sets ################################" ) print( "####################################################################################\n" ) test_set_size = TEST_SET_SIZE[0] for test_iteration in range(TEST_ITERATIONS): test_set_mat = create_single_dataset(size=test_set_size, C=channel_matrix) list_unq.append(len(np.unique(test_set_mat[:, 0]))) test_set_store_folder = DATA_FOLDER + str( test_set_size) + "_size_test_sets/" utilities.createFolder(path=test_set_store_folder) test_df = pn.DataFrame(data=test_set_mat, columns=["O_test", "S_test"]) pn.to_pickle(obj=test_df.values, path=test_set_store_folder + "/test_set_" + str(test_iteration) + ".pkl", protocol=2) print(list_unq)
TRAINING_SET_SIZE = [100, 1000, 10000, 30000, 50000] TEST_SET_SIZE = [50000] VALIDATION_SET_SIZE = [10, 100, 1000, 3000, 5000] TEST_ITERATIONS = 50 TRAIN_ITERATIONS = 5 WIDTH = 20 # in cells HEIGHT = 20 # in cells CELL_SIZE = 250. # in length units (meters) EUCLID = euclid = metric.euclidean(point) # Euclidean distance on qif.point MAX_GAIN = 4 ALPHA = 0.95 DATA_FOLDER = "/home/comete/mromanel/MILES_EXP/EXP_GEO_LOCATION_QIF_LIB_SETTING/DATA_FOLDER/" utilities.createFolder(path=DATA_FOLDER) CHANNEL_PATH = "/home/comete/mromanel/MILES_EXP/EXP_GEO_LOCATION_QIF_LIB_SETTING/channel.pkl" G_OBJ_PATH = "/home/comete/mromanel/MILES_EXP/EXP_GEO_LOCATION_QIF_LIB_SETTING/G_OBJ/" utilities.createFolder(path=G_OBJ_PATH) G_MAT_PATH = G_OBJ_PATH + "g_mat.pkl" G_MAT_ROWS_PATH = G_OBJ_PATH + "g_mat_rows.pkl" G_MAT_COLS_PATH = G_OBJ_PATH + "g_mat_cols.pkl" # set solver lp.defaults.solver = "GLOP" # euclidean distance on cell ids def euclid_cell(a, b):
def main_BIS_EXP_G_VULN_MULTIPLE_GUESSES_create_channel_and_data(): # pi distribution n = 10 pi = probab.uniform(n) print(pi.shape) # g matrix G = pn.read_pickle(path=G_MAT_FILE) print(G.shape) # channel matrix C = pn.read_pickle(path=CHANNEL_FILE).values C = np.transpose(C) print(C.shape) # get rho, R, a, b (rho, R, a, b) = measure.g_vuln.g_to_bayes(G, pi) print("a --->" + str(a)) print("b --->" + str(b)) # for any C we have Vg[pi, C] = a * V[rho, RC] + b print(" Vg[pi, C]: ", measure.g_vuln.posterior(G, pi, C)) print("a * V[rho, RC] + b:", a * measure.bayes_vuln.posterior(rho, R.dot(C)) + b) print( len( np.unique( create_single_dataset(size=10000, R=R, rho=rho, C=C)[:, 0]))) # so we can estimate Vg in a black-box matter by generating samples according to rho and RC ! if len(VALIDATION_SET_SIZE) != len(TRAINING_SET_SIZE): sys.exit("ERROR! Different size lists' lengths.") for size_list_iterator in range(len(TRAINING_SET_SIZE)): training_set_size = TRAINING_SET_SIZE[size_list_iterator] validation_set_size = VALIDATION_SET_SIZE[size_list_iterator] # test_set_size = TEST_SET_SIZE[size_list_iterator] for train_iteration in range(TRAIN_ITERATIONS): training_and_validation_and_test_set_store_folder = DATA_FOLDER + str( training_set_size) + "_training_and_" + str( validation_set_size ) + "_validation_store_folder_train_iteration_" + str( train_iteration) + "/" utilities.createFolder( path=training_and_validation_and_test_set_store_folder) tr = create_single_dataset(size=training_set_size, R=R, rho=rho, C=C) val = create_single_dataset(size=validation_set_size, R=R, rho=rho, C=C) pn.to_pickle( obj=tr, path=training_and_validation_and_test_set_store_folder + "training_set.pkl", protocol=2) pn.to_pickle( obj=val, path=training_and_validation_and_test_set_store_folder + "validation_set.pkl", protocol=2) print("\n\n\nSize " + str(TRAINING_SET_SIZE[size_list_iterator]) + ", train iteration " + str(train_iteration))
def main_EXP_G_VULN_GEO_LOCATION_train_single_ANN_remapping(): read_command_line_options() thismodule = sys.modules[__name__] EXP_G_VULN_GEO_LOCATION_FOLDER = "/home/comete/mromanel/MILES_EXP/EXP_GEO_LOCATION_QIF_LIB_SETTING/" utilities.createFolder(EXP_G_VULN_GEO_LOCATION_FOLDER) RESULT_FOLDER = EXP_G_VULN_GEO_LOCATION_FOLDER + "RESULT_FOLDER_REMAPPING/" utilities.createFolder(RESULT_FOLDER) result_folder = RESULT_FOLDER + MODEL_NAME + "/" utilities.createFolder(result_folder) result_folder = result_folder + str( TRAINING_SIZE) + "_training_size_and_" + str( VALIDATION_SIZE) + "_validation_size_iteration_" + str( TRAINING_ITERATION) + "/" utilities.createFolder(result_folder) DATA_FOLDER = EXP_G_VULN_GEO_LOCATION_FOLDER + "DATA_FOLDER_AFTER_OUR_PREPROCESSING/" ANN_data_folder = DATA_FOLDER + str(TRAINING_SIZE) + "_training_and_" + str( VALIDATION_SIZE) + "_validation_store_folder_train_iteration_" + str( TRAINING_ITERATION) # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% load datasets %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% print "\n\n\nDATA ARE LOADED FROM ", ANN_data_folder, "\n\n\n" log_file = open(result_folder + "/log_file.txt", "wa") log_file.write("\n\n\nDATA ARE LOADED FROM " + ANN_data_folder + "\n\n\n") log_file.close() training_set = pn.read_pickle(path=ANN_data_folder + "/training_set.pkl") O_train = training_set.values[:, 0] S_train = training_set.values[:, 1] Z_train = training_set.values[:, 2] # Z_train_enc = preprocess.array_one_hot_encoder(supervision_=Z_train) """Z_train_list = [] O_train_unq = np.unique(O_train) for otu in O_train_unq: idx = np.where(O_train == otu)[0] tmp = np.mean(Z_train_enc[idx, :], axis=0) idx_max = np.argmax(tmp) for r in range(len(tmp)): if r != idx_max: tmp[r] = 0. else: tmp[r] = 1. Z_train_list.append(tmp) Z_train_final_list = [] for ot in O_train: idx = np.where(O_train_unq == ot)[0] for el in idx: Z_train_final_list.append(Z_train_list[el]) # Z_train = np.array(Z_train_final_list).reshape((Z_train_enc.shape[0], Z_train_enc.shape[1])) Z_train = np.array(Z_train_list).reshape((len(O_train_unq), Z_train_enc.shape[1]))""" val_set = pn.read_pickle(path=ANN_data_folder + "/validation_set.pkl") O_val = val_set.values[:, 0] S_val = val_set.values[:, 1] Z_val = val_set.values[:, 2] # Z_val_enc = preprocess.array_one_hot_encoder(supervision_=Z_val) """Z_val_list = [] O_val_unq = np.unique(O_val) for otu in O_val_unq: idx = np.where(O_val == otu)[0] tmp = np.mean(Z_val_enc[idx, :], axis=0) idx_max = np.argmax(tmp) for r in range(len(tmp)): if r != idx_max: tmp[r] = 0. else: tmp[r] = 1. Z_val_list.append(tmp) Z_val_final_list = [] for ot in O_val: idx = np.where(O_val_unq == ot)[0] for el in idx: Z_val_final_list.append(Z_val_list[el]) # Z_val = np.array(Z_val_final_list).reshape((Z_val_enc.shape[0], Z_val_enc.shape[1])) Z_val = np.array(Z_val_list).reshape((len(O_val_unq), Z_val_enc.shape[1]))""" """O_train = preprocess.scaler_between_minus_one_and_one(column=O_train, min_column=MIN_OBSERVABLE, max_column=MAX_OBSERVABLE) O_val = preprocess.scaler_between_minus_one_and_one(column=O_val, min_column=MIN_OBSERVABLE, max_column=MAX_OBSERVABLE)""" min_max_scaler = preprocessing.MinMaxScaler() O_train = O_train.reshape(-1, 1) O_train = min_max_scaler.fit_transform(O_train) O_val = O_val.reshape(-1, 1) O_val = min_max_scaler.transform(O_val) Z_train_enc = one_hot_enc(y=Z_train, num_classes=N_CLASSES) Z_val_enc = one_hot_enc(y=Z_val, num_classes=N_CLASSES) # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ANN: instantiate, train, evaluate %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% if len(O_train.shape) == 1: input_x_dimension = 1 else: input_x_dimension = O_train.shape[1] if thismodule.BATCH_SIZE == None: thismodule.BATCH_SIZE = O_train.shape[0] secrets_classifier_manager = secrets_classifier.ClassifierNetworkManager( number_of_classes=Z_train_enc.shape[1], learning_rate=LEARNING_RATE, hidden_layers_card=HIDDEN_LAYERS_CARD, hidden_neurons_card=HIDDEN_NEAURONS_CARD, epochs=EPOCHS, batch_size=BATCH_SIZE, id_gpu=ID_GPU, perc_gpu=PERC_GPU, input_x_dimension=input_x_dimension) secrets_classifier_manager.train_classifier_net( training_set=O_train, training_supervision=Z_train_enc, validation_set=O_val, validation_supervision=Z_val_enc, results_folder=result_folder)
import numpy as np import pandas as pn from tqdm import tqdm from utilities_pckg import utilities from qif import channel, measure, probab tqdm.monitor_interval = 0 TRAINING_SET_SIZE = [10000, 30000, 50000] # [90000, 270000, 450000] TEST_SET_SIZE = [90000] # [90000, 270000, 450000] VALIDATION_SET_SIZE = [1000, 3000, 5000] # [9000, 27000, 45000] TEST_ITERATIONS = 50 TRAIN_ITERATIONS = 5 BIS_EXP_G_VULN_MULTIPLE_GUESSES_FOLDER = "/home/comete/mromanel/MILES_EXP/BIS_EXP_G_VULN_MULTIPLE_GUESSES/" utilities.createFolder(BIS_EXP_G_VULN_MULTIPLE_GUESSES_FOLDER) CHANNEL_FILE = BIS_EXP_G_VULN_MULTIPLE_GUESSES_FOLDER + "channel_df_norm.pkl" G_MAT_FILE = BIS_EXP_G_VULN_MULTIPLE_GUESSES_FOLDER + "G_MAT_FOLDER/g_matrix_10_secrets_2_guesses.pkl" DATA_FOLDER = BIS_EXP_G_VULN_MULTIPLE_GUESSES_FOLDER + "DATA_FOLDER/" utilities.createFolder(DATA_FOLDER) ##### draw from rho/RC, black box def execute_C( x, C ): # we only have black box access to C. This function runs C under secret x and returns an output y # C_x = np.array(C[x, :]) return probab.draw(C[x, :])
def main_BIS_EXP_G_VULN_DP_train_single_ANN_remapping(): read_command_line_options() thismodule = sys.modules[__name__] BIS_EXP_G_VULN_DP_FOLDER = "/home/comete/mromanel/MILES_EXP/BIS_EXP_G_VULN_DP_FOLDER/" utilities.createFolder(BIS_EXP_G_VULN_DP_FOLDER) RESULT_FOLDER = BIS_EXP_G_VULN_DP_FOLDER + "RESULT_FOLDER_REMAPPING/" utilities.createFolder(RESULT_FOLDER) result_folder = RESULT_FOLDER + MODEL_NAME + "/" utilities.createFolder(result_folder) result_folder = result_folder + str( TRAINING_SIZE) + "_training_size_and_" + str( VALIDATION_SIZE) + "_validation_size_iteration_" + str( TRAINING_ITERATION) + "/" utilities.createFolder(result_folder) DATA_FOLDER = BIS_EXP_G_VULN_DP_FOLDER + "DATA_FOLDER/" ANN_data_folder = DATA_FOLDER + str(TRAINING_SIZE) + "_training_and_" + str( VALIDATION_SIZE) + "_validation_store_folder_train_iteration_" + str( TRAINING_ITERATION) # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% load datasets %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% print "\n\n\nDATA ARE LOADED FROM ", ANN_data_folder, "\n\n\n" log_file = open(result_folder + "/log_file.txt", "wa") log_file.write("\n\n\nDATA ARE LOADED FROM " + ANN_data_folder + "\n\n\n") log_file.close() training_set = pn.read_pickle(path=ANN_data_folder + "/training_set.pkl") O_train = training_set[:, 0:training_set.shape[1] - 2] print O_train.shape S_train = training_set[:, -2] Z_train = training_set[:, -1] Z_train_enc = to_categorical(y=Z_train, num_classes=NUM_CLASSES) val_set = pn.read_pickle(path=ANN_data_folder + "/validation_set.pkl") O_val = val_set[:, 0:val_set.shape[1] - 2] S_val = val_set[:, -2] Z_val = val_set[:, -1] Z_val_enc = to_categorical(y=Z_val, num_classes=NUM_CLASSES) min_ = np.min(O_train) # print min_ max_ = np.max(O_train) # print max_ O_train = preprocess.scaler_zero_one_all_cols(data=O_train, min_=min_, max_=max_) O_val = preprocess.scaler_zero_one_all_cols(data=O_val, min_=min_, max_=max_) # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ANN: instantiate, train, evaluate %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% if len(O_train.shape) == 1: input_x_dimension = 1 else: input_x_dimension = O_train.shape[1] if thismodule.BATCH_SIZE is None: thismodule.BATCH_SIZE = O_train.shape[0] secrets_classifier_manager = secrets_classifier.ClassifierNetworkManager( number_of_classes=Z_train_enc.shape[1], learning_rate=LEARNING_RATE, hidden_layers_card=HIDDEN_LAYERS_CARD, hidden_neurons_card=HIDDEN_NEAURONS_CARD, epochs=EPOCHS, batch_size=BATCH_SIZE, id_gpu=ID_GPU, perc_gpu=PERC_GPU, input_x_dimension=input_x_dimension) secrets_classifier_manager.train_classifier_net( training_set=O_train, training_supervision=Z_train_enc, validation_set=O_val, validation_supervision=Z_val_enc, results_folder=result_folder)
def main_EXP_GEO_LOCATION_QIF_LIB_SETTING_create_channel_and_g_mat_and_data(): # grid # diagonal of the grid diag = euclid(point(0, 0), point(CELL_SIZE * WIDTH, CELL_SIZE * HEIGHT)) # loss function, just euclidean distance loss = euclid_cell # some sanity checks sanity_checks(considered_cell=132) max_vuln = f(CELL_SIZE) # maximum allowed posterior g-vulnerability hard_max_loss = 2 * CELL_SIZE # loss(x,y) > hard_max_loss => C[x,y] = 0 n_secrets = n_outputs = n_guesses = WIDTH * HEIGHT pi_dic = pn.read_pickle( path= "/home/comete/mromanel/MILES_EXP/EXP_GEO_LOCATION_QIF_LIB_SETTING/file_prior_distr.pkl" ) # print("\n\n\npi dictionary ---> pi[cell]:cell_probability") print(pi_dic) pi_mat = np.zeros((WIDTH, HEIGHT)) for i_ter in range(WIDTH): for j_ter in range(HEIGHT): cell_id = WIDTH * i_ter + j_ter pi_mat[i_ter, j_ter] = pi_dic[cell_id] print( "\n\n\n Table for pi where up is south, down is north, left is west, right is east." ) headers = [ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19" ] table_pi_mat = tabulate(pi_mat, headers, tablefmt="fancy_grid") print(table_pi_mat) # pi_mat_map = np.flip(pi_mat, 0) # print("\n\n\n") # print("\n\n\n Table for pi where up is north, down is south, left is west, right is east.") # table_pi_mat_map = tabulate(pi_mat_map, headers, tablefmt="fancy_grid") # print(table_pi_mat_map) pi = pi_mat.flatten() # probab.uniform(n_secrets) # uniform prior # print("\n\n\npi ---> such that pi[i] = prob_cell[i]") print(pi) ############################ list_of_cells_probs = [] for id_cell_ind in range(len(pi_dic)): list_of_cells_probs.append(pi_dic[id_cell_ind]) # sanity check for i in range(len(pi)): if pi[i] != list_of_cells_probs[i]: sys.exit("ERROR in prior") ############################ gmat1 = create_gain_matrix() gmat2 = create_gain_matrix2() for i in range(gmat1.shape[0]): for j in range(gmat1.shape[1]): if gmat1[i, j] != gmat2[i, j]: sys.exit("BAZINGAAAAAA") print(euclid_cell(13, 20)) print(euclid_cell(20, 13)) # solve C = mechanism.g_vuln.min_loss_given_max_vuln(pi, n_outputs, n_guesses, max_vuln, gain, loss, hard_max_loss) # print("\n\nC:\n", C) # print("\n\nmax_vuln:", max_vuln) # print("\n\nVg(pi, C)", measure.g_vuln.posterior(gain, pi, C)) # print("\n\nUtility C:", utility.expected_distance(loss, pi, C)) # print("-----------------\n") # """ # # Inverse problem # max_loss = 300 # C = mechanism.g_vuln.min_vuln_given_max_loss(pi, n_outputs, n_guesses, max_loss, gain, loss, hard_max_loss) # print("C:\n", C) # print("max_vuln:", max_vuln) # print("Vg(pi, C)", measure.g_vuln.posterior(gain, pi, C)) # print("Utility:", utility.expected_distance(loss, pi, C)) # """ # # C_copy = copy.deepcopy(C) # for i_ter in tqdm(range(C.shape[0])): # prob_observables_given_secret = C_copy[i_ter, :] # # print(np.sum(prob_observables_given_secret)) # prob_observables_given_secret_norm = tuple( # p / sum(prob_observables_given_secret) for p in prob_observables_given_secret) # C_copy[i_ter, :] = prob_observables_given_secret_norm # # print("\n\nVg(pi, C_copy)", measure.g_vuln.posterior(gain, pi, C_copy)) # print("\n\nUtility C_copy:", utility.expected_distance(loss, pi, C_copy)) # print("-----------------\n") # # for i in range(C_copy.shape[0]): # print(sum(C_copy[i, :])) # # for r in range(10): # print("\n\n\n###########################################\n\n\n") # # C_copy_transposed = np.transpose(C_copy) # for j_ter in range(C_copy_transposed.shape[1]): # sum_ = sum(C_copy_transposed[:, j_ter]) # print(sum_) # pn.to_pickle(obj=C_copy_transposed, path=CHANNEL_PATH, protocol=2) # # g_mat = create_gain_matrix() # g_mat_rows = np.arange(start=0, stop=HEIGHT ** 2, step=1) # g_mat_cols = np.arange(start=0, stop=WIDTH ** 2, step=1) # pn.to_pickle(obj=g_mat, path=G_MAT_PATH, protocol=2) # pn.to_pickle(obj=g_mat_rows, path=G_MAT_ROWS_PATH, protocol=2) # pn.to_pickle(obj=g_mat_cols, path=G_MAT_COLS_PATH, protocol=2) if len(VALIDATION_SET_SIZE) != len(TRAINING_SET_SIZE): err_hndl(str_="array_sizes_not_matching", add=inspect.stack()[0][3]) for size_list_iterator in range(len(TRAINING_SET_SIZE)): training_set_size = TRAINING_SET_SIZE[size_list_iterator] validation_set_size = VALIDATION_SET_SIZE[size_list_iterator] for train_iteration in range(TRAIN_ITERATIONS): training_set_mat = create_single_dataset(size=training_set_size, C=C, pi=pi) training_and_validation_and_test_set_store_folder = DATA_FOLDER + str( training_set_size) + "_training_and_" + str( validation_set_size ) + "_validation_store_folder_train_iteration_" + str( train_iteration) + "/" utilities.createFolder( path=training_and_validation_and_test_set_store_folder) training_df = pn.DataFrame(data=training_set_mat, columns=["O_train", "S_train"]) pn.to_pickle( obj=training_df.values, path=training_and_validation_and_test_set_store_folder + "/training_set.pkl", protocol=2) ################################################################################################################ validation_set_mat = create_single_dataset( size=validation_set_size, C=C, pi=pi) validation_df = pn.DataFrame(data=validation_set_mat, columns=["O_val", "S_val"]) pn.to_pickle( obj=validation_df.values, path=training_and_validation_and_test_set_store_folder + "/validation_set.pkl", protocol=2) # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% create test sets %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% if CREATE_TEST_SET: list_unq = [] print( "\n####################################################################################" ) print( "################################# create test sets ################################" ) print( "####################################################################################\n" ) test_set_size = TEST_SET_SIZE[0] for test_iteration in range(TEST_ITERATIONS): test_set_mat = create_single_dataset(size=test_set_size, C=C, pi=pi) list_unq.append(len(np.unique(test_set_mat[:, 0]))) test_set_store_folder = DATA_FOLDER + str( test_set_size) + "_size_test_sets/" utilities.createFolder(path=test_set_store_folder) test_df = pn.DataFrame(data=test_set_mat, columns=["O_test", "S_test"]) pn.to_pickle(obj=test_df.values, path=test_set_store_folder + "/test_set_" + str(test_iteration) + ".pkl", protocol=2) print(list_unq)
def main_EMPIRICAL_ESTIMATES_create_data(): # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% geometric distribution loading %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% print("\n####################################################################################") print("######################### geometric distribution loading #########################") print("####################################################################################\n") utilities.createFolder(DATA_FOLDER) channel_matrix_df = pn.read_pickle(path=CHANNEL_PATH) channel_matrix = channel_matrix_df.values print channel_matrix.shape # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% create training sets %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # X are the observables and y are the secrets (respectively col 0 and 1), stratify wrt to secret # split training and test data if len(TEST_SET_SIZE) != len(TRAINING_SET_SIZE) or len(VALIDATION_SET_SIZE) != len(TRAINING_SET_SIZE): err_hndl(str_="array_sizes_not_matching", add=inspect.stack()[0][3]) for size_list_iterator in range(len(TRAINING_SET_SIZE)): training_set_size = TRAINING_SET_SIZE[size_list_iterator] validation_set_size = VALIDATION_SET_SIZE[size_list_iterator] test_set_size = TEST_SET_SIZE[size_list_iterator] for train_iteration in range(TRAIN_ITERATIONS): training_set_mat = linear_geometric_mechanism.sample_from_distribution( channel_matrix_df_path=CHANNEL_PATH, rndmstt=utilities.create_new_rndm_state(), samples_per_secret=int( training_set_size / len( channel_matrix_df.columns.values))) training_and_validation_and_test_set_store_folder = DATA_FOLDER + str( training_set_size) + "_training_and_" + str(validation_set_size) + "_validation_and_" + str( test_set_size) + "_test_store_folder_train_iteration_" + str(train_iteration) + "/" utilities.createFolder(path=training_and_validation_and_test_set_store_folder) training_df = pn.DataFrame(data=training_set_mat, columns=["O_train", "S_train"]) training_df.to_pickle(path=training_and_validation_and_test_set_store_folder + "/training_set.pkl") ################################################################################################################ validation_set_mat = linear_geometric_mechanism.sample_from_distribution( channel_matrix_df_path=CHANNEL_PATH, rndmstt=utilities.create_new_rndm_state(), samples_per_secret=int( validation_set_size / len( channel_matrix_df.columns.values))) validation_df = pn.DataFrame(data=validation_set_mat, columns=["O_val", "S_val"]) validation_df.to_pickle(path=training_and_validation_and_test_set_store_folder + "/validation_set.pkl") # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% create test sets %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% if CREATE_TEST_SET: print("\n####################################################################################") print("################################# create test sets ################################") print("####################################################################################\n") for test_iteration in range(TEST_ITERATIONS): test_set_mat = linear_geometric_mechanism.sample_from_distribution( channel_matrix_df_path=CHANNEL_PATH, rndmstt=utilities.create_new_rndm_state(), samples_per_secret=int( test_set_size / len( channel_matrix_df.columns.values))) test_set_store_folder = training_and_validation_and_test_set_store_folder + str( test_set_size) + "_size_test_sets/" utilities.createFolder(path=test_set_store_folder) test_df = pn.DataFrame(data=test_set_mat, columns=["O_test", "S_test"]) test_df.to_pickle( path=test_set_store_folder + "/test_set_" + str(test_iteration) + ".pkl")
import time import numpy as np import pandas as pn from tqdm import tqdm from scipy import stats from utilities_pckg import utilities from qif import channel, measure, mechanism, probab TRAINING_SET_SIZE = [10000, 30000, 50000] TEST_SET_SIZE = [50000] VALIDATION_SET_SIZE = [1000, 3000, 5000] TEST_ITERATIONS = 50 TRAIN_ITERATIONS = 5 BIS_EXP_G_VULN_DP_FOLDER = "/home/comete/mromanel/MILES_EXP/BIS_EXP_G_VULN_DP_FOLDER/" utilities.createFolder(BIS_EXP_G_VULN_DP_FOLDER) DATA_FOLDER = BIS_EXP_G_VULN_DP_FOLDER + "DATA_FOLDER/" utilities.createFolder(DATA_FOLDER) DATA_FOLDER_TEST = DATA_FOLDER + str(TEST_SET_SIZE[0]) + "_size_test_set/" utilities.createFolder(DATA_FOLDER_TEST) G_OBJ = BIS_EXP_G_VULN_DP_FOLDER + "G_OBJ/" utilities.createFolder(G_OBJ) # real counts, replace with those from the real db # order: 0, 1, 2, 3, 4 ---> 164 55 36 35 13 real_counts = np.array([164, 55, 36, 35, 13]) # true counts # real_counts = np.array([40, 55, 36, 35, 13]) # fake counts for safety check
from geometric_mechanisms import linear_geometric_mechanism from utilities_pckg import g_vuln_computation, utilities from utilities_pckg.runtime_error_handler import runtime_error_handler as err_hndl import pandas as pn import inspect EXP_G_VULN_MULTIPLE_GUESSES_FOLDER_PATH = "/home/comete/mromanel/MILES_EXP/EXP_G_VULN_MULTIPLE_GUESSES/" CHANNEL_PATH = EXP_G_VULN_MULTIPLE_GUESSES_FOLDER_PATH + "channel_df_norm.pkl" DATA_FOLDER = EXP_G_VULN_MULTIPLE_GUESSES_FOLDER_PATH + "DATA_FOLDER/" utilities.createFolder(DATA_FOLDER) TRAINING_SET_SIZE = [10000, 30000, 50000] TEST_SET_SIZE = [10000, 30000, 50000] VALIDATION_SET_SIZE = [1000, 3000, 5000] TEST_ITERATIONS = 100 TRAIN_ITERATIONS = 10 CREATE_TEST_SET = True def main_EXP_G_VULN_MULTIPLE_GUESSES_create_data(): # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% geometric distribution loading %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% print( "\n####################################################################################" ) print( "######################### geometric distribution loading #########################"
from utilities_pckg import g_function_manager, utilities import pandas as pn EXP_G_VULN_MULTIPLE_GUESSES_FOLDER_PATH = "/home/comete/mromanel/MILES_EXP/EXP_G_VULN_MULTIPLE_GUESSES/" CHANNEL_PATH = EXP_G_VULN_MULTIPLE_GUESSES_FOLDER_PATH + "channel_df_norm.pkl" G_MAT_FOLDER = EXP_G_VULN_MULTIPLE_GUESSES_FOLDER_PATH + "G_MAT_FOLDER/" utilities.createFolder(G_MAT_FOLDER) N_GUESSES = 2 def main_EXP_G_VULN_MULTIPLE_GUESSES_create_g_matrix(): channel_colnames = pn.read_pickle(path=CHANNEL_PATH).columns.values g_function_manager.create_g_function_matrix_n_guesses( list_unique_secrets=channel_colnames, n_guesses=N_GUESSES, save_g_path=G_MAT_FOLDER)