def attackerOracle(G, coverage_probs, phi, omega=4, num_paths=100): N = nx.number_of_nodes(G) coverage_prob_matrix = torch.zeros((N, N)) for i, e in enumerate(list(G.edges())): coverage_prob_matrix[e[0]][e[1]] = coverage_probs[i] coverage_prob_matrix[e[1]][e[0]] = coverage_probs[ i] # for undirected graph only # EXACT EDGE PROBS unbiased_probs = phi2prob(G, phi) biased_probs = prob2unbiased( G, -coverage_probs, unbiased_probs, omega=omega) # feeding negative coverage to be biased # EMPIRICAL EDGE PROBS path_list = [] simulated_defender_utility_list = [] for _ in range(num_paths): path = getMarkovianWalk(G, biased_probs) # path = getSimplePath(G, path) # TODO path_list.append(path) defender_utility = -G.node[path[-1][1]]['utility'] for e in path: defender_utility *= (1 - coverage_prob_matrix[e[0]][e[1]]) simulated_defender_utility_list.append(defender_utility) simulated_defender_utility = np.mean(simulated_defender_utility_list) return path_list, simulated_defender_utility
def train_model(train_data, validate_data, test_data, lr=0.1, learning_model='random_walk_distribution', block_selection='coverage', n_epochs=150, batch_size=100, optimizer='adam', omega=4, training_method='surrogate-decision-focused', max_norm=0.1, block_cut_size=0.5, T_size=10): net2 = GCNPredictionNet2(feature_size) net2.train() sample_graph = train_data[0][0] init_T, init_s = torch.rand(sample_graph.number_of_edges(), T_size), torch.zeros( sample_graph.number_of_edges()) T, s = torch.tensor( normalize_matrix_positive(init_T), requires_grad=True ), torch.tensor( init_s, requires_grad=False ) # bias term s can cause infeasibility. It is not yet known how to resolve it. full_T, full_s = torch.eye(sample_graph.number_of_edges(), requires_grad=False), torch.zeros( sample_graph.number_of_edges(), requires_grad=False) T_lr = lr # ================ Optimizer ================ if optimizer == 'adam': optimizer = optim.Adam(net2.parameters(), lr=lr) T_optimizer = optim.Adam([T, s], lr=T_lr) # optimizer=optim.Adam(list(net2.parameters()) + [T], lr=lr) elif optimizer == 'sgd': optimizer = optim.SGD(net2.parameters(), lr=lr) T_optimizer = optim.SGD([T, s], lr=T_lr) elif optimizer == 'adamax': optimizer = optim.Adamax(net2.parameters(), lr=lr) T_optimizer = optim.Adamax([T, s], lr=T_lr) # scheduler = ReduceLROnPlateau(optimizer, 'min') scheduler = ReduceLROnPlateau(optimizer, 'min') T_scheduler = ReduceLROnPlateau(T_optimizer, 'min') training_loss_list, validating_loss_list, testing_loss_list = [], [], [] training_defender_utility_list, validating_defender_utility_list, testing_defender_utility_list = [], [], [] print("Training...") forward_time, qp_time, backward_time = 0, 0, 0 pretrain_epochs = 0 decay_rate = 0.95 for epoch in range(-1, n_epochs): epoch_forward_time, epoch_qp_time, epoch_backward_time = 0, 0, 0 if epoch <= pretrain_epochs: ts_weight = 1 df_weight = 0 else: ts_weight = decay_rate**(epoch - pretrain_epochs) df_weight = 1 - ts_weight for mode in ["training", "validating", "testing"]: if mode == "training": dataset = train_data epoch_loss_list = training_loss_list epoch_def_list = training_defender_utility_list if epoch > 0: net2.train() else: net2.eval() elif mode == "validating": dataset = validate_data epoch_loss_list = validating_loss_list epoch_def_list = validating_defender_utility_list net2.eval() elif mode == "testing": dataset = test_data epoch_loss_list = testing_loss_list epoch_def_list = testing_defender_utility_list net2.eval() else: raise TypeError("Not valid mode: {}".format(mode)) loss_list, def_obj_list = [], [] for iter_n in tqdm.trange(len(dataset)): G, Fv, coverage_prob, phi_true, path_list, cut, log_prob, unbiased_probs_true, previous_gradient = dataset[ iter_n] n, m = G.number_of_nodes(), G.number_of_edges() budget = G.graph['budget'] # ==================== Visualization =================== # if iter_n == 0 and mode == 'training': # from plot_utils import plot_graph, reduce_dimension # T_reduced = T.detach().numpy() # reduce_dimension(T.detach().numpy()) # plot_graph(G, T_reduced, epoch) # =============== Compute edge probabilities =========== Fv_torch = torch.as_tensor(Fv, dtype=torch.float) edge_index = torch.Tensor(list( nx.DiGraph(G).edges())).long().t() phi_pred = net2(Fv_torch, edge_index).view( -1 ) if epoch >= 0 else phi_true # when epoch < 0, testing the optimal loss and defender utility # phi_pred.require_grad = True unbiased_probs_pred = phi2prob( G, phi_pred) if epoch >= 0 else unbiased_probs_true biased_probs_pred = prob2unbiased( G, -coverage_prob, unbiased_probs_pred, omega=omega) # feeding negative coverage to be biased # =================== Compute loss ===================== log_prob_pred = torch.zeros(1) for path in path_list: for e in path: log_prob_pred -= torch.log( biased_probs_pred[e[0]][e[1]]) log_prob_pred /= len(path_list) loss = (log_prob_pred - log_prob)[0] # ============== COMPUTE DEFENDER UTILITY ============== single_data = dataset[iter_n] if epoch == -1: # optimal solution cut_size = m def_obj, def_coverage, ( single_forward_time, single_qp_time ) = getDefUtility( single_data, full_T, full_s, unbiased_probs_pred, learning_model, cut_size=cut_size, omega=omega, verbose=False, training_mode=False, training_method=training_method, block_selection=block_selection) # feed forward only single_forward_time, single_qp_time = 0, 0 # testing epoch so not counting the computation time elif mode == 'testing' or mode == "validating" or epoch <= 0: cut_size = m def_obj, def_coverage, ( single_forward_time, single_qp_time ) = getDefUtility( single_data, T, s, unbiased_probs_pred, learning_model, cut_size=cut_size, omega=omega, verbose=False, training_mode=False, training_method=training_method, block_selection=block_selection) # feed forward only single_forward_time, single_qp_time = 0, 0 # testing epoch so not counting the computation time else: if training_method == 'decision-focused' or training_method == 'surrogate-decision-focused': cut_size = m else: raise TypeError('Not defined method') def_obj, def_coverage, ( single_forward_time, single_qp_time) = getDefUtility( single_data, T, s, unbiased_probs_pred, learning_model, cut_size=cut_size, omega=omega, verbose=False, training_mode=True, training_method=training_method, block_selection=block_selection ) # most time-consuming part epoch_forward_time += single_forward_time epoch_qp_time += single_qp_time def_obj_list.append(def_obj.item()) loss_list.append(loss.item()) if (iter_n % batch_size == (batch_size - 1)) and ( epoch > 0) and (mode == "training"): backward_start_time = time.time() optimizer.zero_grad() T_optimizer.zero_grad() try: if training_method == "decision-focused" or training_method == "surrogate-decision-focused": (-def_obj).backward() # (-def_obj * df_weight + loss * ts_weight).backward() else: raise TypeError("Not Implemented Method") # torch.nn.utils.clip_grad_norm_(net2.parameters(), max_norm=max_norm) # gradient clipping for parameter in net2.parameters(): parameter.grad = torch.clamp(parameter.grad, min=-max_norm, max=max_norm) T.grad = torch.clamp(T.grad, min=-max_norm, max=max_norm) optimizer.step() T_optimizer.step() except: print("no grad is backpropagated...") epoch_backward_time += time.time() - backward_start_time # ============== normalize T matrix ================= T.data = normalize_matrix_positive(T.data) # T.data = normalize_matrix_qr(T.data) # s.data = normalize_vector(s.data, max_value=budget) # print(s.data) # ========= scheduler using validation set ========== if (epoch > 0) and (mode == "validating"): if training_method == "decision-focused" or training_method == "surrogate-decision-focused": scheduler.step(-np.mean(def_obj_list)) T_scheduler.step(-np.mean(def_obj_list)) else: raise TypeError("Not Implemented Method") # ======= Storing loss and defender utility ========= epoch_loss_list.append(np.mean(loss_list)) epoch_def_list.append(np.mean(def_obj_list)) # ========== Print stuff after every epoch ========== np.random.shuffle(dataset) print("Mode: {}/ Epoch number: {}/ Loss: {}/ DefU: {}".format( mode, epoch, np.mean(loss_list), np.mean(def_obj_list))) print('Forward time for this epoch: {}'.format(epoch_forward_time)) print('QP time for this epoch: {}'.format(epoch_qp_time)) print('Backward time for this epoch: {}'.format(epoch_backward_time)) if epoch >= 0: forward_time += epoch_forward_time qp_time += epoch_qp_time backward_time += epoch_backward_time # ============= early stopping criteria ============= kk = 3 if epoch >= kk * 2 - 1: GE_counts = np.sum( np.array(validating_defender_utility_list[1:][-kk:]) <= np.array(validating_defender_utility_list[1:][-2 * kk:-kk]) + 1e-4) print( 'Generalization error increases counts: {}'.format(GE_counts)) if GE_counts == kk: break average_nodes = np.mean([x[0].number_of_nodes() for x in train_data] + [x[0].number_of_nodes() for x in validate_data] + [x[0].number_of_nodes() for x in test_data]) average_edges = np.mean([x[0].number_of_edges() for x in train_data] + [x[0].number_of_edges() for x in validate_data] + [x[0].number_of_edges() for x in test_data]) print('Total forward time: {}'.format(forward_time)) print('Total qp time: {}'.format(qp_time)) print('Total backward time: {}'.format(backward_time)) return net2, training_loss_list, validating_loss_list, testing_loss_list, training_defender_utility_list, validating_defender_utility_list, testing_defender_utility_list, ( forward_time, qp_time, backward_time), epoch
U = [G.node[t]['utility'] for t in targets] U.append(-20) print('U:', U) U = np.array(U) budget = 0.5 * E # CODE BLOCK FOR GENERATING PHI (GROUND TRUTH PHI GENERATED FOR NOW) node_feature_size = 25 net1 = GCNDataGenerationNet(node_feature_size) # Define node features for each of the n nodes for node in list(G.nodes()): node_features = np.random.randn(node_feature_size) G.node[node]['node_features'] = node_features Fv = np.zeros((N, node_feature_size)) for node in list(G.nodes()): Fv[node] = G.node[node]['node_features'] Fv_torch = torch.as_tensor(Fv, dtype=torch.float) # Generate attractiveness values for nodes A = nx.to_numpy_matrix(G) A_torch = torch.as_tensor(A, dtype=torch.float) phi = (net1.forward(Fv_torch, A_torch).view(-1)).detach() unbiased_probs = phi2prob(G, phi) optimal_coverage_probs = get_optimal_coverage_prob(G, unbiased_probs, U, initial_distribution, budget) print("Optimal coverage:\n", optimal_coverage_probs) print("Budget: ", budget) print("Sum of coverage probabilities: ", sum(optimal_coverage_probs['x']))
def train_model(train_data, validate_data, test_data, lr=0.1, learning_model='random_walk_distribution', block_selection='coverage', n_epochs=150, batch_size=100, optimizer='adam', omega=4, training_method='two-stage', max_norm=0.1, block_cut_size=0.5): net2= GCNPredictionNet2(feature_size) net2.train() if optimizer=='adam': optimizer=optim.Adam(net2.parameters(), lr=lr) elif optimizer=='sgd': optimizer=optim.SGD(net2.parameters(), lr=lr) elif optimizer=='adamax': optimizer=optim.Adamax(net2.parameters(), lr=lr) # scheduler = ReduceLROnPlateau(optimizer, 'min') scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5) training_loss_list, validating_loss_list, testing_loss_list = [], [], [] training_defender_utility_list, validating_defender_utility_list, testing_defender_utility_list = [], [], [] print ("Training...") forward_time, qp_time, backward_time = 0, 0, 0 evaluate = False pretrain_epochs = 0 decay_rate = 0.95 for epoch in range(-1, n_epochs): if epoch == n_epochs - 1: evaluate = True epoch_forward_time, epoch_qp_time, epoch_backward_time = 0, 0, 0 if epoch <= pretrain_epochs: ts_weight = 1 df_weight = 0 else: ts_weight = decay_rate ** (epoch - pretrain_epochs) df_weight = 1 - ts_weight for mode in ["training", "validating", "testing"]: if mode == "training": dataset = train_data epoch_loss_list = training_loss_list epoch_def_list = training_defender_utility_list if epoch > 0: net2.train() else: net2.eval() elif mode == "validating": dataset = validate_data epoch_loss_list = validating_loss_list epoch_def_list = validating_defender_utility_list net2.eval() elif mode == "testing": dataset = test_data epoch_loss_list = testing_loss_list epoch_def_list = testing_defender_utility_list net2.eval() else: raise TypeError("Not valid mode: {}".format(mode)) loss_list, def_obj_list = [], [] for iter_n in tqdm.trange(len(dataset)): G, Fv, coverage_prob, phi_true, path_list, cut, log_prob, unbiased_probs_true, previous_gradient = dataset[iter_n] n, m = G.number_of_nodes(), G.number_of_edges() # =============== Compute edge probabilities =========== Fv_torch = torch.as_tensor(Fv, dtype=torch.float) edge_index = torch.Tensor(list(nx.DiGraph(G).edges())).long().t() phi_pred = net2(Fv_torch, edge_index).view(-1) if epoch >= 0 else phi_true # when epoch < 0, testing the optimal loss and defender utility # phi_pred.require_grad = True unbiased_probs_pred = phi2prob(G, phi_pred) if epoch >= 0 else unbiased_probs_true biased_probs_pred = prob2unbiased(G, -coverage_prob, unbiased_probs_pred, omega=omega) # feeding negative coverage to be biased # =================== Compute loss ===================== log_prob_pred = torch.zeros(1) for path in path_list: for e in path: log_prob_pred -= torch.log(biased_probs_pred[e[0]][e[1]]) log_prob_pred /= len(path_list) loss = (log_prob_pred - log_prob)[0] # ============== COMPUTE DEFENDER UTILITY ============== single_data = dataset[iter_n] if mode == 'testing' or mode == "validating" or epoch <= 0: # or training_method == "two-stage" or epoch <= 0: cut_size = m if training_method == "two-stage" and not evaluate: def_obj, def_coverage, single_forward_time, single_qp_time = torch.Tensor([-float('Inf')]), None, 0, 0 else: def_obj, def_coverage, (single_forward_time, single_qp_time) = getDefUtility(single_data, unbiased_probs_pred, learning_model, cut_size=cut_size, omega=omega, verbose=False, training_mode=False, training_method=training_method, block_selection=block_selection) # feed forward only single_foward_time, single_qp_time = 0, 0 else: if training_method == "two-stage" or epoch <= pretrain_epochs: cut_size = m if evaluate: def_obj, def_coverage, (single_forward_time, single_qp_time) = getDefUtility(single_data, unbiased_probs_pred, learning_model, cut_size=cut_size, omega=omega, verbose=False, training_mode=False, training_method=training_method, block_selection=block_selection) # most time-consuming part else def_obj, def_coverage, single_forward_time, single_qp_time = torch.Tensor([-float('Inf')]), None, 0, 0 # ignore the time of computing defender utility else: if training_method == 'decision-focused': cut_size = m elif training_method == 'block-decision-focused' or training_method == 'hybrid' or training_method == 'corrected-block-decision-focused': if type(block_cut_size) == str and block_cut_size[-1] == 'n': cut_size = int(n * float(block_cut_size[:-1])) elif block_cut_size <= 1: cut_size = int(m * block_cut_size) else: cut_size = block_cut_size else: raise TypeError('Not defined method') def_obj, def_coverage, (single_forward_time, single_qp_time) = getDefUtility(single_data, unbiased_probs_pred, learning_model, cut_size=cut_size, omega=omega, verbose=False, training_mode=True, training_method=training_method, block_selection=block_selection) # most time-consuming part epoch_forward_time += single_forward_time epoch_qp_time += single_qp_time def_obj_list.append(def_obj.item()) loss_list.append(loss.item()) if (iter_n%batch_size == (batch_size-1)) and (epoch > 0) and (mode == "training"): backward_start_time = time.time() optimizer.zero_grad() try: if training_method == "two-stage" or epoch <= pretrain_epochs: loss.backward() elif training_method == "decision-focused" or training_method == "block-decision-focused" or training_method == 'corrected-block-decision-focused': # (-def_obj).backward() (-def_obj * m / cut_size).backward() elif training_method == "hybrid": # ((-def_obj) * df_weight + loss[0] * ts_weight).backward() ((-def_obj * m / cut_size) * df_weight + loss * ts_weight).backward() else: raise TypeError("Not Implemented Method") torch.nn.utils.clip_grad_norm_(net2.parameters(), max_norm=max_norm) # gradient clipping # print(torch.norm(net2.gcn1.weight.grad)) # print(torch.norm(net2.gcn2.weight.grad)) # print(torch.norm(net2.fc1.weight.grad)) optimizer.step() except: print("no grad is backpropagated...") epoch_backward_time += time.time() - backward_start_time if (epoch > 0) and (mode == "validating"): if training_method == "two-stage": scheduler.step(np.mean(loss_list)) elif training_method == "decision-focused" or training_method == "block-decision-focused" or training_method == 'corrected-block-decision-focused' or training_method == 'hybrid': scheduler.step(-np.mean(def_obj_list)) else: raise TypeError("Not Implemented Method") # Storing loss and defender utility epoch_loss_list.append(np.mean(loss_list)) epoch_def_list.append(np.mean(def_obj_list)) ################################### Print stuff after every epoch np.random.shuffle(dataset) print("Mode: {}/ Epoch number: {}/ Loss: {}/ DefU: {}".format( mode, epoch, np.mean(loss_list), np.mean(def_obj_list))) print('Forward time for this epoch: {}'.format(epoch_forward_time)) print('QP time for this epoch: {}'.format(epoch_qp_time)) print('Backward time for this epoch: {}'.format(epoch_backward_time)) if epoch >= 0: forward_time += epoch_forward_time qp_time += epoch_qp_time backward_time += epoch_backward_time # ============= early stopping criteria ============= kk = 3 if epoch >= kk*2 -1: if training_method == 'two-stage': if evaluate: break GE_counts = np.sum(np.array(validating_loss_list[1:][-kk:]) >= np.array(validating_loss_list[1:][-2*kk:-kk]) - 1e-4) print('Generalization error increases counts: {}'.format(GE_counts)) if GE_counts == kk: evaluate = True else: # surrogate or decision-focused GE_counts = np.sum(np.array(validating_defender_utility_list[1:][-kk:]) <= np.array(validating_defender_utility_list[1:][-2*kk:-kk]) + 1e-4) print('Generalization error increases counts: {}'.format(GE_counts)) if GE_counts == kk: break average_nodes = np.mean([x[0].number_of_nodes() for x in train_data] + [x[0].number_of_nodes() for x in validate_data] + [x[0].number_of_nodes() for x in test_data]) average_edges = np.mean([x[0].number_of_edges() for x in train_data] + [x[0].number_of_edges() for x in validate_data] + [x[0].number_of_edges() for x in test_data]) print('Total forward time: {}'.format(forward_time)) print('Total qp time: {}'.format(qp_time)) print('Total backward time: {}'.format(backward_time)) return net2, training_loss_list, validating_loss_list, testing_loss_list, training_defender_utility_list, validating_defender_utility_list, testing_defender_utility_list, (forward_time, qp_time, backward_time), epoch
def generateSyntheticData(node_feature_size, omega=4, n_graphs=20, samples_per_graph=100, empirical_samples_per_instance=10, fixed_graph=False, path_type='random_walk', N_low=16, N_high=20, e_low=0.6, e_high=0.7, budget=2, train_test_split_ratio=(0.7, 0.1, 0.2), n_sources=1, n_targets=1, random_seed=0, noise_level=0): # Random seed setting print("Random seed: {}".format(random_seed)) if random_seed != 0: torch.manual_seed(random_seed) np.random.seed(random_seed) random.seed(random_seed) # initialization data = [ ] # aggregate all the data first then split into training and testing generated_node_feature_size = node_feature_size net3 = featureGenerationNet2(generated_node_feature_size) # net3= featureGenerationNet2(node_feature_size) n_samples = n_graphs * samples_per_graph print("N_samples: ", n_samples) for graph_number in range(n_graphs): ''' # Pick the graph in cyclic fashion from the correct list of graphs graph_index=0 if sample_number<n_training_samples: G=training_graphs[sample_number%n_training_graphs] graph_index=sample_number%n_training_graphs else: G=testing_graphs[sample_number%n_testing_graphs] graph_index=sample_number%n_testing_graphs ''' while True: G = returnGraph(fixed_graph=fixed_graph, n_sources=n_sources, n_targets=n_targets, N_low=N_low, N_high=N_high, e_low=e_low, e_high=e_high, budget=budget) # PRECOMPUTE A MIN-CUT m = G.number_of_edges() edges = G.edges() edge2index = {} for idx, edge in enumerate(edges): edge2index[edge] = idx edge2index[(edge[1], edge[0])] = idx dummyG = copy.deepcopy(G) dummyG.add_nodes_from(['ds', 'dt' ]) # 1000 dummy source, 2000 dummy target for x in dummyG.graph['sources']: dummyG.add_edge('ds', x, capacity=100) for x in dummyG.graph['targets']: dummyG.add_edge(x, 'dt', capacity=100) value, partition = nx.minimum_cut(dummyG, 'ds', 'dt') print('cut size:', value) partition0, partition1 = set(partition[0]), set(partition[1]) cut = [] for idx, edge in enumerate(G.edges()): if edge[0] in partition0 and edge[1] in partition1: cut.append(idx) elif edge[0] in partition1 and edge[1] in partition0: cut.append(idx) print('cut:', cut) if value > budget: break # COMPUTE ADJACENCY MATRIX edge_index = torch.Tensor(list(nx.DiGraph(G).edges())).long().t() N = nx.number_of_nodes(G) m = nx.number_of_edges(G) # Visualization # from plot_utils import plot_graph # colors = np.random.random((m, 3)) # plot_graph(G, colors) ''' # Define node features for each of the n nodes for node in list(G.nodes()): node_features=np.random.randn(node_feature_size) # TODO: Use a better feature computation for a given node G.node[node]['node_features']=node_features # Generate features Fv=np.zeros((N,node_feature_size)) for node in list(G.nodes()): Fv[node]=G.node[node]['node_features'] ''' random_feature_indices = np.random.choice(generated_node_feature_size, node_feature_size, replace=False) for _ in range(samples_per_graph): # Randomly assign coverage probability private_coverage_prob = np.random.rand(m) private_coverage_prob = (private_coverage_prob / sum(private_coverage_prob)) * budget coverage_prob_matrix = torch.zeros(N, N) for i, e in enumerate(list(G.edges())): coverage_prob_matrix[e[0]][e[1]] = private_coverage_prob[i] coverage_prob_matrix[e[1]][e[0]] = private_coverage_prob[i] # Randomly generate attractiveness # phi is the attractiveness function, phi(v,f) for each of the N nodes, v phi = generatePhi(G, fixed_phi=fixed_graph) phi = phi - np.mean(phi) phi = torch.as_tensor(phi, dtype=torch.float) # Generate features from phi values Fv_torch = net3.forward(phi.view(-1, 1), edge_index) Fv = Fv_torch.detach().numpy() Fv = Fv[:, random_feature_indices] # EXACT EDGE PROBS unbiased_probs = phi2prob(G, phi) biased_probs = prob2unbiased( G, -private_coverage_prob, unbiased_probs, omega=omega) # feeding negative coverage to get biased # Call Attacker Oracle path_list, _ = attackerOracle( G, private_coverage_prob, phi, omega=omega, num_paths=empirical_samples_per_instance) # EMPIRICAL EDGE PROBS empirical_transition_probs = torch.zeros((N, N)) for path in path_list: for e in path: empirical_transition_probs[e[0]][e[1]] += 1 # row_sum = torch.sum(empirical_transition_probs, dim=1) adj = torch.Tensor( nx.adjacency_matrix(G, nodelist=range(N)).toarray()) empirical_transition_probs = empirical_transition_probs / torch.sum( empirical_transition_probs, dim=1, keepdim=True) # empirical_transition_probs[row_sum == 0] = 0 empirical_transition_probs[torch.isnan( empirical_transition_probs)] = 0 # empirical_transition_probs = empirical_transition_probs * adj # print('biased:', empirical_transition_probs) empirical_unbiased_probs = prob2unbiased( G, private_coverage_prob, empirical_transition_probs, omega) # print('unbiased:', empirical_unbiased_probs) previous_gradient = torch.zeros(m, m) # DATA POINT if path_type == 'random_walk_distribution': log_prob = torch.zeros(1) for path in path_list: for e in path: log_prob -= torch.log(biased_probs[e[0]][e[1]]) log_prob /= len(path_list) data_point = (G, Fv, private_coverage_prob, phi, path_list, cut, log_prob, unbiased_probs, previous_gradient) elif path_type == 'empirical_distribution': log_prob = torch.zeros(1) for path in path_list: for e in path: log_prob -= torch.log( empirical_transition_probs[e[0]][e[1]]) log_prob /= len(path_list) data_point = (G, Fv, private_coverage_prob, phi, path_list, cut, log_prob, empirical_unbiased_probs, previous_gradient) else: raise (TypeError) data.append(data_point) data = np.array(data) np.random.shuffle(data) print("average node size:", np.mean([x[0].number_of_nodes() for x in data])) print("average edge size:", np.mean([x[0].number_of_edges() for x in data])) train_size = int(train_test_split_ratio[0] * len(data)) validate_size = int(train_test_split_ratio[1] * len(data)) Fv_training_list = [data[i][1] for i in range(train_size)] Fv_training_features = np.concatenate( Fv_training_list, axis=0) # concatenate all the features Fv_training_mean = np.mean(Fv_training_features) Fv_training_std = np.std(Fv_training_features) print('mean:', Fv_training_mean, 'std', Fv_training_std) for i in range(len(data)): # normalizing based on the training set data[i][1] = ( data[i][1] - Fv_training_mean) / Fv_training_std + np.random.normal( size=data[i][1].shape) * noise_level # maintain that var=1 training_data, validate_data, testing_data = data[:train_size], data[ train_size:train_size + validate_size], data[train_size + validate_size:] return np.array(training_data), np.array(validate_data), np.array( testing_data)