def metrics_test_set(path_test_set, Omega_max, Phi_max, Lambda_max, list_experts, path_times=None): dict_instances = pickle.load(open(path_test_set, "rb")) n_nodes = list(dict_instances.keys()) df_n = dict() df_n['$\#V$'] = n_nodes df_n['$\eta(\%)$'] = [] df_n['$\zeta$'] = [] df_n['$t_{exact}(s)$'] = [] df_n['$t_{heur}(s)$'] = [] if path_times is not None: exact_times = pickle.load(open(path_times, "rb")) else: exact_times = dict() for n in n_nodes: exact_times[n] = [np.nan] * len(dict_instances[n]) print( "==========================================================================" ) print("Computing the values using the heuristic... \n") for n in tqdm(n_nodes): val_exact_n = [] val_heur_n = [] time_heur_n = [] for instance in dict_instances[n]: ta = time.time() value_heuristic, D_heur, I_heur, P_heur = solve_mcn( instance.G, instance.Omega, instance.Phi, instance.Lambda, J=instance.J, Omega_max=Omega_max, Phi_max=Phi_max, Lambda_max=Lambda_max, exact=False, list_experts=list_experts) tb = time.time() time_heur_n.append(tb - ta) val_heur_n.append(value_heuristic) val_exact_n.append(instance.value) df_n['$\eta(\%)$'].append(opt_gap(val_exact_n, val_heur_n) * 100) df_n['$\zeta$'].append(approx_ratio(val_exact_n, val_heur_n)) df_n['$t_{heur}(s)$'].append(np.mean(np.array(time_heur_n))) df_n['$t_{exact}(s)$'].append(np.mean(np.array(exact_times[n]))) return df_n
def generate_test_set_dqn(n_free_min, n_free_max, d_edge_min, d_edge_max, Omega_max, Phi_max, Lambda_max, weighted, w_max, directed, size_test_set, to_torch=False): """Generates a set of random instances that are solved exactly with the MCN_exact algorithm. Each budget possible in [1, Omega_max + Phi_max + Lambda_max] is equally represented in the test set. The dataset is then dumped in a .gz file inside data\test_data""" # Initialize the variables Budget_max = Omega_max + Phi_max + Lambda_max test_set = [] if to_torch: test_set_torch = [] print( "==========================================================================" ) print("Generates the test set... \n") # for all budgets for budget in tqdm(range(1, Budget_max + 1)): # initialize the budget's instances list test_set_budget = [] if to_torch: test_set_budget_torch = [] for k in range(size_test_set): # generate a random instance instance_budget_k = generate_random_instance( n_free_min, n_free_max, d_edge_min, d_edge_max, Omega_max, Phi_max, Lambda_max, weighted=weighted, w_max=w_max, Budget_target=budget, directed=directed, ) G = instance_budget_k.G Omega = instance_budget_k.Omega Phi = instance_budget_k.Phi Lambda = instance_budget_k.Lambda J = instance_budget_k.J # solve the instance value, D, I, P = solve_mcn(G, Omega, Phi, Lambda, J=J, exact=True) # save the value, P, D in the Instance object instance_budget_k.value = value instance_budget_k.D = D instance_budget_k.I = I instance_budget_k.P = P # pushes it to memory test_set_budget.append(instance_budget_k) # if we want to save the corresponding InstanceTorch # to evaluate the training, we stop at Budget_max - 1 if to_torch: instance_budget_k_torch = instance_to_torch(instance_budget_k) test_set_budget_torch.append(instance_budget_k_torch) test_set.append(test_set_budget) if to_torch: test_set_torch.append(test_set_budget_torch) if not os.path.exists('data'): os.mkdir('data') folder_name = 'test_data' if weighted: folder_name += '_w' if directed: folder_name += '_dir' path_test_data = os.path.join('data', folder_name) if not os.path.exists(path_test_data): os.mkdir(path_test_data) # Save the test sets file_path = os.path.join(path_test_data, "test_set.gz") pickle.dump(test_set, open(file_path, "wb")) if to_torch: file_path_torch = os.path.join(path_test_data, "test_set_torch.gz") pickle.dump(test_set_torch, open(file_path_torch, "wb"))
def load_create_datasets(size_train_data, size_val_data, batch_size, num_workers, n_free_min, n_free_max, d_edge_min, d_edge_max, Omega_max, Phi_max, Lambda_max, weighted, w_max, directed, Budget, list_experts, path_data, solve_exact=False, exact_protection=False, batch_unroll=None): """Create or load the training and validation sets. Return two dataloaders to access both datasets. Dump the datasets in a .gz file in data/train_data and data/val_data""" print("\n==========================================================================") print("Creating or Loading the Training and Validation sets for Budget = %2d \n" % Budget) # Initialize the dataset and number of instances to generate data = [] len_data_train = 0 total_size = size_train_data + size_val_data # If there is a data folder if path_data is not None: # we check whether there is already a training set # corresponding to the budget we want path_train_data_budget = os.path.join(path_data, 'train_data', 'data_'+str(Budget)+'.gz') # if it's the case, we load it if os.path.exists(path_train_data_budget): data += pickle.load(open(path_train_data_budget, "rb")) len_data_train = len(data) # similarly, we check whether there is a validation set available path_val_data_budget = os.path.join(path_data, 'val_data', 'data_' + str(Budget) + '.gz') # if it's the case, we load it if os.path.exists(path_val_data_budget): data += pickle.load(open(path_val_data_budget, "rb")) # Update the number of instances that needs to be created total_size = total_size - len(data) # We create the instances that are currently lacking in the datasets # If we need the exact protection, we solve one instance at a time if exact_protection: for k in tqdm(range(total_size)): # Sample a random instance instance = generate_random_instance( n_free_min, n_free_max, d_edge_min, d_edge_max, Omega_max, Phi_max, Lambda_max, Budget, weighted, w_max, directed, ) # Solves the mcn problem value, _, _, _ = solve_mcn( instance.G, instance.Omega, instance.Phi, instance.Lambda, J=instance.J, Omega_max=Omega_max, Phi_max=Phi_max, Lambda_max=Lambda_max, exact=solve_exact, list_experts=list_experts, exact_protection=exact_protection, ) instance.value = value # Transform the instance to a InstanceTorch object instance_torch = instance_to_torch(instance) # add the instance to the data data.append(instance_torch) # Else, we can solve batches of instances together else: # Compute the number of batches necessary to fill the memory if batch_unroll is None: min_size_instance = n_free_min + Budget max_size_instance = n_free_max + Budget mean_size_instance = min_size_instance + (max_size_instance - min_size_instance) // 2 batch_instances = batch_size // mean_size_instance else: batch_instances = batch_unroll n_iterations = total_size // batch_instances + 1 * (total_size % batch_instances > 0) for k in tqdm(range(n_iterations)): # Sample a batch of random instance list_instances = generate_random_batch_instance( batch_instances, n_free_min, n_free_max, d_edge_min, d_edge_max, Omega_max, Phi_max, Lambda_max, Budget, weighted, w_max, directed, ) # Solves the mcn problem for the batch using the heuristic values = solve_mcn_heuristic_batch( list_experts, list_instances, Omega_max, Phi_max, Lambda_max, ) for i in range(batch_instances): list_instances[i].value = values[i] # Transform the instance to a InstanceTorch object instance_torch = instance_to_torch(list_instances[i]) # add the instance to the data data.append(instance_torch) # Save the data if there is a change in the dataset if len_data_train != size_train_data or total_size > 0: if path_data is None: path_data = 'data' if not os.path.exists(path_data): os.mkdir(path_data) path_train = os.path.join(path_data, 'train_data') if not os.path.exists(path_train): os.mkdir(path_train) path_val = os.path.join(path_data, 'val_data') if not os.path.exists(path_val): os.mkdir(path_val) path_train_data_budget = os.path.join(path_train, 'data_' + str(Budget) + '.gz') path_val_data_budget = os.path.join(path_val, 'data_' + str(Budget) + '.gz') pickle.dump(data[:size_train_data], open(path_train_data_budget, "wb")) pickle.dump(data[size_train_data:], open(path_val_data_budget, "wb")) print("\nSaved datasets in " + path_data, '\n') # Create the datasets used during training and validation val_data = MCNDataset(data[size_train_data:size_train_data + size_val_data]) train_data = MCNDataset(data[:size_train_data]) train_loader = DataLoader( train_data, collate_fn=collate_fn, batch_size=batch_size, shuffle=True, num_workers=num_workers, ) val_loader = DataLoader( val_data, collate_fn=collate_fn, batch_size=batch_size, shuffle=True, num_workers=num_workers, ) return train_loader, val_loader
def compute_node_values(G, J, Omega, Phi, Lambda, exact=True, Omega_max=None, Phi_max=None, Lambda_max=None, list_experts=None): """Compute the value of each node of the graph given the budgets and already attacked nodes.""" value_nodes = dict() weights = graph_weights(G) is_weighted = len(nx.get_node_attributes(G, 'weight').values()) != 0 is_directed = False in [(v, u) in G.edges() for (u, v) in G.edges()] # for every node possible for k in G.nodes(): # if the node is already attacked if k in J: # its value is null value_nodes[k] = 0 else: G1 = G.copy() # get the player whose turn it is to play player = get_player(Omega, Phi, Lambda) # if it is the defender's turn if player == 0 or player == 2: # remove the node from the graph next_G, mapping = new_graph(G1, k) next_J = [mapping[node] for node in J] reward = weights[k] # if it is the attacker's turn else: # attack the node next_J = J + [k] next_G = G1 reward = 0 # compute the next budgets next_Omega = Omega next_Phi = Phi next_Lambda = Lambda if player == 0: next_Omega = Omega - 1 elif player == 1: next_Phi = Phi - 1 elif player == 2: next_Lambda = Lambda - 1 if exact: # compute the value of the afterstate value, D, I, P = solve_mcn(next_G, next_Omega, next_Phi, next_Lambda, J=next_J, exact=True) # the value of the node is: reward + value of the afterstate value_nodes[k] = int(reward + value) else: # format the instance so that it can be read by our neural network instance = Instance(next_G, next_Omega, next_Phi, next_Lambda, next_J, 0) instance_torch = instance_to_torch(instance) batch_torch = Batch.from_data_list([instance_torch.G_torch ]).to(device) # get the right expert target_net = get_target_net(list_experts, next_Omega, next_Phi, next_Lambda, Omega_max, Phi_max, Lambda_max) # compute the value of the afterstate value_approx = float( target_net(batch_torch, instance_torch.n_nodes, instance_torch.Omegas, instance_torch.Phis, instance_torch.Lambdas, instance_torch.Omegas_norm, instance_torch.Phis_norm, instance_torch.Lambdas_norm, instance_torch.J)) if is_weighted: value_nodes[k] = round(reward + value_approx, 1) else: value_nodes[k] = round(Omega + Lambda + value_approx, 1) # plot the values nx.draw_spring(G, with_labels=True, node_size=600, node_color=np.array(list(value_nodes.values())), cmap='viridis_r', alpha=1.0, edge_color='gray', arrows=is_directed, width=3, labels=value_nodes, font_size=12, font_color='white')
def metrics_each_stage_cur(Omega_max, Phi_max, Lambda_max, list_experts, exact_protection=False, path_test_data=None, DQN=False, **kwargs): """Compute the optimality gap on test sets of exactly solved instances. print the average optimality over all the test sets, the optimality gaps for each player and for each learning stage. Return the results of the computation if return_computation is set to True""" # if the test set was not given if path_test_data is None: # generate the test set in the 'data\test' directory generate_test_set(Omega_max=Omega_max, Phi_max=Phi_max, Lambda_max=Lambda_max, **kwargs) path_test_data = os.path.join('data', 'test_data') path_test_set = os.path.join(path_test_data, 'test_set.gz') test_set = pickle.load(open(path_test_set, "rb")) print( "==========================================================================" ) print("Computing the values using the heuristic... \n") # Initialize the variables player_values_true = [[], [], []] player_values_heuristic = [[], [], []] budget_values_true = [] budget_values_heuristic = [] df_player = dict() df_budget = dict() df_player['$\eta(\%)$'] = [] df_player['$\zeta$'] = [] df_budget['$\eta(\%)$'] = [] df_budget['$\zeta$'] = [] # For each learning stage, there is a corresponding test set for k in tqdm(range(len(test_set))): # Initialize the variables of the learning stage budget = k + 1 dataset = test_set[k] budget_values_true.append([]) budget_values_heuristic.append([]) # Iterate over the instances in the dataset for instance in dataset: if not DQN: value_heuristic, D_heur, I_heur, P_heur = solve_mcn( instance.G, instance.Omega, instance.Phi, instance.Lambda, J=instance.J, Omega_max=Omega_max, Phi_max=Phi_max, Lambda_max=Lambda_max, exact=False, list_experts=list_experts, exact_protection=exact_protection) else: value_heuristic = solve_greedy_dqn(instance, list_experts[0]) value_exact = instance.value # add the values to memory budget_values_true[k].append(value_exact) budget_values_heuristic[k].append(value_heuristic) if budget <= Lambda_max: player_values_true[2].append(value_exact) player_values_heuristic[2].append(value_heuristic) elif budget <= Lambda_max + Phi_max: player_values_true[1].append(value_exact) player_values_heuristic[1].append(value_heuristic) elif budget > Lambda_max + Phi_max: player_values_true[0].append(value_exact) player_values_heuristic[0].append(value_heuristic) # compute the budget's metrics df_budget['$\eta(\%)$'].append( opt_gap(budget_values_true[k], budget_values_heuristic[k]) * 100) df_budget['$\zeta$'].append( approx_ratio(budget_values_true[k], budget_values_heuristic[k])) # Compute the player's metrics for player in [0, 1, 2]: df_player['$\eta(\%)$'].append( opt_gap(player_values_true[player], player_values_heuristic[player]) * 100) df_player['$\zeta$'].append( approx_ratio(player_values_true[player], player_values_heuristic[player])) # Compute the average metrics over all datasets # doesn't take into account the values already solved exactly in the heuristic method # (e.g for Lambda = 1 if exact_protection=False, the instances are solved exactly) first_budget = 1 + Lambda_max * exact_protection tot_val_approx = [ value for k in range(first_budget, len(test_set)) for value in budget_values_heuristic[k] ] tot_val_true = [ value for k in range(first_budget, len(test_set)) for value in budget_values_true[k] ] opt_gap_mean = opt_gap(tot_val_true, tot_val_approx) approx_ratio_mean = approx_ratio(tot_val_true, tot_val_approx) print('Average Approx Ratio : ', approx_ratio_mean) print("Average optimality gap : %f %%" % (opt_gap_mean * 100)) # Compute the index of the budget's dataframe index_budget = [] for k in range(Omega_max + Phi_max + Lambda_max): budget = k + 1 if budget <= Lambda_max: index_budget.append('$\Omega = 0, \Phi = 0 , \Lambda = %d $' % budget) elif budget <= Lambda_max + Phi_max: index_budget.append( '$\Omega = 0, \Phi = %d , \Lambda \in [\![0, %d]\!]$' % (budget - Lambda_max, Lambda_max)) elif budget > Lambda_max + Phi_max: index_budget.append( '$\Omega = %d, \Phi \in [\![1, %d]\!] , \Lambda \in [\![0, %d]\!] $' % (budget - Lambda_max - Phi_max, Phi_max, Lambda_max)) return df_budget, df_player, index_budget, [ 'Vaccinator', 'Attacker', 'Protector' ]