def run(save_loc="ER_40spin/eco", graph_save_loc="_graphs/validation/ER_40spin_p15_100graphs.pkl", batched=True, max_batch_size=None): print("\n----- Running {} -----\n".format(os.path.basename(__file__))) #################################################### # NETWORK LOCATION #################################################### # info_str = "train_mpnn" date = datetime.datetime.now().strftime("%Y-%m") data_folder = os.path.join(save_loc, 'data') network_folder = os.path.join(save_loc, 'network') print("data folder :", data_folder) print("network folder :", network_folder) test_save_path = os.path.join(network_folder, 'test_scores.pkl') network_save_path = os.path.join(network_folder, 'network_best.pth') print("network params :", network_save_path) #################################################### # NETWORK SETUP #################################################### network_fn = MPNN network_args = { 'n_layers': 3, 'n_features': 64, 'n_hid_readout': [], 'tied_weights': False } #################################################### # SET UP ENVIRONMENTAL AND VARIABLES #################################################### gamma = 0.95 step_factor = 2 env_args = { 'observables': DEFAULT_OBSERVABLES, 'reward_signal': RewardSignal.BLS, 'extra_action': ExtraAction.NONE, 'optimisation_target': OptimisationTarget.CUT, 'spin_basis': SpinBasis.BINARY, 'norm_rewards': True, 'memory_length': None, 'horizon_length': None, 'stag_punishment': None, 'basin_reward': 1. / 40, 'reversible_spins': True } #################################################### # LOAD VALIDATION GRAPHS #################################################### graphs_test = load_graph_set(graph_save_loc) #################################################### # SETUP NETWORK TO TEST #################################################### test_env = ising_env.make("SpinSystem", SingleGraphGenerator(graphs_test[0]), graphs_test[0].shape[0] * step_factor, **env_args) device = "cuda" if torch.cuda.is_available() else "cpu" torch.device(device) print("Set torch default device to {}.".format(device)) network = network_fn(n_obs_in=test_env.observation_space.shape[1], **network_args).to(device) network.load_state_dict(torch.load(network_save_path, map_location=device)) for param in network.parameters(): param.requires_grad = False network.eval() print( "Sucessfully created agent with pre-trained MPNN.\nMPNN architecture\n\n{}" .format(repr(network))) #################################################### # TEST NETWORK ON VALIDATION GRAPHS #################################################### results, results_raw, history = test_network(network, env_args, graphs_test, device, step_factor, return_raw=True, return_history=True, batched=batched, max_batch_size=max_batch_size) results_fname = "results_" + os.path.splitext( os.path.split(graph_save_loc)[-1])[0] + ".pkl" results_raw_fname = "results_" + os.path.splitext( os.path.split(graph_save_loc)[-1])[0] + "_raw.pkl" history_fname = "results_" + os.path.splitext( os.path.split(graph_save_loc)[-1])[0] + "_history.pkl" for res, fname, label in zip( [results, results_raw, history], [results_fname, results_raw_fname, history_fname], ["results", "results_raw", "history"]): save_path = os.path.join(data_folder, fname) res.to_pickle(save_path) print("{} saved to {}".format(label, save_path))
def __test_network_batched(network, env_args, graphs_test, device=None, step_factor=1, n_attempts=50, return_raw=False, return_history=False, max_batch_size=None): if device is None: device = "cuda" if torch.cuda.is_available() else "cpu" torch.device(device) # HELPER FUNCTION FOR NETWORK TESTING acting_in_reversible_spin_env = env_args['reversible_spins'] if env_args['reversible_spins']: # If MDP is reversible, both actions are allowed. if env_args['spin_basis'] == SpinBasis.BINARY: allowed_action_state = (0, 1) elif env_args['spin_basis'] == SpinBasis.SIGNED: allowed_action_state = (1, -1) else: # If MDP is irreversible, only return the state of spins that haven't been flipped. if env_args['spin_basis'] == SpinBasis.BINARY: allowed_action_state = 0 if env_args['spin_basis'] == SpinBasis.SIGNED: allowed_action_state = 1 def predict(states): qs = network(states) if acting_in_reversible_spin_env: if qs.dim() == 1: actions = [qs.argmax().item()] else: actions = qs.argmax(1, True).squeeze(1).cpu().numpy() return actions else: if qs.dim() == 1: x = (states.squeeze()[:, 0] == allowed_action_state).nonzero() actions = [x[qs[x].argmax().item()].item()] else: disallowed_actions_mask = (states[:, :, 0] != allowed_action_state) qs_allowed = qs.masked_fill(disallowed_actions_mask, -1000) actions = qs_allowed.argmax(1, True).squeeze(1).cpu().numpy() return actions # NETWORK TESTING results = [] results_raw = [] if return_history: history = [] n_attempts = n_attempts if env_args["reversible_spins"] else 1 for j, test_graph in enumerate(graphs_test): i_comp = 0 i_batch = 0 t_total = 0 n_spins = test_graph.shape[0] n_steps = int(n_spins * step_factor) test_env = ising_env.make("SpinSystem", SingleGraphGenerator(test_graph), n_steps, **env_args) print("Running greedy solver with +1 initialisation of spins...", end="...") # Calculate the greedy cut with all spins initialised to +1 greedy_env = deepcopy(test_env) greedy_env.reset(spins=np.array([1] * test_graph.shape[0])) greedy_agent = Greedy(greedy_env) greedy_agent.solve() greedy_single_cut = greedy_env.get_best_cut() greedy_single_spins = greedy_env.best_spins print("done.") if return_history: actions_history = [] rewards_history = [] scores_history = [] best_cuts = [] init_spins = [] best_spins = [] greedy_cuts = [] greedy_spins = [] while i_comp < n_attempts: if max_batch_size is None: batch_size = n_attempts else: batch_size = min(n_attempts - i_comp, max_batch_size) i_comp_batch = 0 if return_history: actions_history_batch = [[None] * batch_size] rewards_history_batch = [[None] * batch_size] scores_history_batch = [] test_envs = [None] * batch_size best_cuts_batch = [-1e3] * batch_size init_spins_batch = [[] for _ in range(batch_size)] best_spins_batch = [[] for _ in range(batch_size)] greedy_envs = [None] * batch_size greedy_cuts_batch = [] greedy_spins_batch = [] obs_batch = [None] * batch_size print("Preparing batch of {} environments for graph {}.".format( batch_size, j), end="...") for i in range(batch_size): env = deepcopy(test_env) obs_batch[i] = env.reset() test_envs[i] = env greedy_envs[i] = deepcopy(env) init_spins_batch[i] = env.best_spins if return_history: scores_history_batch.append( [env.calculate_score() for env in test_envs]) print("done.") # Calculate the max cut acting w.r.t. the network t_start = time.time() # pool = mp.Pool(processes=16) k = 0 while i_comp_batch < batch_size: t1 = time.time() # Note: Do not convert list of np.arrays to FloatTensor, it is very slow! # see: https://github.com/pytorch/pytorch/issues/13918 # Hence, here we convert a list of np arrays to a np array. obs_batch = torch.FloatTensor(np.array(obs_batch)).to(device) actions = predict(obs_batch) obs_batch = [] if return_history: scores = [] rewards = [] i = 0 for env, action in zip(test_envs, actions): if env is not None: obs, rew, done, info = env.step(action) if return_history: scores.append(env.calculate_score()) rewards.append(rew) if not done: obs_batch.append(obs) else: best_cuts_batch[i] = env.get_best_cut() best_spins_batch[i] = env.best_spins i_comp_batch += 1 i_comp += 1 test_envs[i] = None i += 1 k += 1 if return_history: actions_history_batch.append(actions) scores_history_batch.append(scores) rewards_history_batch.append(rewards) # print("\t", # "Par. steps :", k, # "Env steps : {}/{}".format(k/batch_size,n_steps), # 'Time: {0:.3g}s'.format(time.time()-t1)) t_total += (time.time() - t_start) i_batch += 1 print("Finished agent testing batch {}.".format(i_batch)) if env_args["reversible_spins"]: print( "Running greedy solver with {} random initialisations of spins for batch {}..." .format(batch_size, i_batch), end="...") for env in greedy_envs: Greedy(env).solve() cut = env.get_best_cut() greedy_cuts_batch.append(cut) greedy_spins_batch.append(env.best_spins) print("done.") if return_history: actions_history += actions_history_batch rewards_history += rewards_history_batch scores_history += scores_history_batch best_cuts += best_cuts_batch init_spins += init_spins_batch best_spins += best_spins_batch if env_args["reversible_spins"]: greedy_cuts += greedy_cuts_batch greedy_spins += greedy_spins_batch # print("\tGraph {}, par. steps: {}, comp: {}/{}".format(j, k, i_comp, batch_size), # end="\r" if n_spins<100 else "") i_best = np.argmax(best_cuts) best_cut = best_cuts[i_best] sol = best_spins[i_best] mean_cut = np.mean(best_cuts) if env_args["reversible_spins"]: idx_best_greedy = np.argmax(greedy_cuts) greedy_random_cut = greedy_cuts[idx_best_greedy] greedy_random_spins = greedy_spins[idx_best_greedy] greedy_random_mean_cut = np.mean(greedy_cuts) else: greedy_random_cut = greedy_single_cut greedy_random_spins = greedy_single_spins greedy_random_mean_cut = greedy_single_cut print( 'Graph {}, best(mean) cut: {}({}), greedy cut (rand init / +1 init) : {} / {}. ({} attempts in {}s)\t\t\t' .format(j, best_cut, mean_cut, greedy_random_cut, greedy_single_cut, n_attempts, np.round(t_total, 2))) results.append([ best_cut, sol, mean_cut, greedy_single_cut, greedy_single_spins, greedy_random_cut, greedy_random_spins, greedy_random_mean_cut, t_total / (n_attempts) ]) results_raw.append( [init_spins, best_cuts, best_spins, greedy_cuts, greedy_spins]) if return_history: history.append([ np.array(actions_history).T.tolist(), np.array(scores_history).T.tolist(), np.array(rewards_history).T.tolist() ]) results = pd.DataFrame(data=results, columns=[ "cut", "sol", "mean cut", "greedy (+1 init) cut", "greedy (+1 init) sol", "greedy (rand init) cut", "greedy (rand init) sol", "greedy (rand init) mean cut", "time" ]) results_raw = pd.DataFrame( data=results_raw, columns=["init spins", "cuts", "sols", "greedy cuts", "greedy sols"]) if return_history: history = pd.DataFrame(data=history, columns=["actions", "scores", "rewards"]) if return_raw == False and return_history == False: return results else: ret = [results] if return_raw: ret.append(results_raw) if return_history: ret.append(history) return ret
def run(save_loc="pretrained_agent/s2v", network_save_loc="experiments_new/pretrained_agent/networks/s2v/network_best_ER_200spin.pth", graph_save_loc="_graphs/benchmarks/ising_125spin_graphs.pkl", batched=True, max_batch_size=5): print("\n----- Running {} -----\n".format(os.path.basename(__file__))) #################################################### # FOLDER LOCATIONS #################################################### print("save location :", save_loc) print("network params :", network_save_loc) mk_dir(save_loc) #################################################### # NETWORK SETUP #################################################### network_fn = MPNN network_args = { 'n_layers': 3, 'n_features': 64, 'n_hid_readout': [], 'tied_weights': False } #################################################### # SET UP ENVIRONMENTAL AND VARIABLES #################################################### step_factor = 1 env_args = {'observables':[Observable.SPIN_STATE], 'reward_signal':RewardSignal.DENSE, 'extra_action':ExtraAction.NONE, 'optimisation_target':OptimisationTarget.CUT, 'spin_basis':SpinBasis.BINARY, 'norm_rewards':True, 'memory_length':None, 'horizon_length':None, 'stag_punishment':None, 'basin_reward':None, 'reversible_spins':False} #################################################### # LOAD VALIDATION GRAPHS #################################################### graphs_test = load_graph_set(graph_save_loc) #################################################### # SETUP NETWORK TO TEST #################################################### test_env = ising_env.make("SpinSystem", SingleGraphGenerator(graphs_test[0]), graphs_test[0].shape[0] * step_factor, **env_args) device = "cuda" if torch.cuda.is_available() else "cpu" torch.device(device) print("Set torch default device to {}.".format(device)) network = network_fn(n_obs_in=test_env.observation_space.shape[1], **network_args).to(device) network.load_state_dict(torch.load(network_save_loc, map_location=device)) for param in network.parameters(): param.requires_grad = False network.eval() print("Sucessfully created agent with pre-trained MPNN.\nMPNN architecture\n\n{}".format(repr(network))) #################################################### # TEST NETWORK ON VALIDATION GRAPHS #################################################### results, results_raw, history = test_network(network, env_args, graphs_test, device, step_factor, return_raw=True, return_history=True, n_attempts=50, batched=batched, max_batch_size=max_batch_size) results_fname = "results_" + os.path.splitext(os.path.split(graph_save_loc)[-1])[0] + ".pkl" results_raw_fname = "results_" + os.path.splitext(os.path.split(graph_save_loc)[-1])[0] + "_raw.pkl" history_fname = "results_" + os.path.splitext(os.path.split(graph_save_loc)[-1])[0] + "_history.pkl" for res, fname, label in zip([results, results_raw, history], [results_fname, results_raw_fname, history_fname], ["results", "results_raw", "history"]): save_path = os.path.join(save_loc, fname) res.to_pickle(save_path) print("{} saved to {}".format(label, save_path))
def __test_network_sequential(network, env_args, graphs_test, step_factor=1, n_attempts=50, return_raw=False, return_history=False): if return_raw or return_history: raise NotImplementedError( "I've not got to this yet! Used the batched test script (it's faster anyway)." ) results = [] n_attempts = n_attempts if env_args["reversible_spins"] else 1 for i, test_graph in enumerate(graphs_test): n_steps = int(test_graph.shape[0] * step_factor) best_cut = -1e3 best_spins = [] greedy_random_cut = -1e3 greedy_random_spins = [] greedy_single_cut = -1e3 greedy_single_spins = [] times = [] test_env = ising_env.make("SpinSystem", SingleGraphGenerator(test_graph), n_steps, **env_args) net_agent = Network(network, test_env, record_cut=False, record_rewards=False, record_qs=False) greedy_env = deepcopy(test_env) greedy_env.reset(spins=np.array([1] * test_graph.shape[0])) greedy_agent = Greedy(greedy_env) greedy_agent.solve() greedy_single_cut = greedy_env.get_best_cut() greedy_single_spins = greedy_env.best_spins for k in range(n_attempts): net_agent.reset(clear_history=True) greedy_env = deepcopy(test_env) greedy_agent = Greedy(greedy_env) tstart = time.time() net_agent.solve() times.append(time.time() - tstart) cut = test_env.get_best_cut() if cut > best_cut: best_cut = cut best_spins = test_env.best_spins greedy_agent.solve() greedy_cut = greedy_env.get_best_cut() if greedy_cut > greedy_random_cut: greedy_random_cut = greedy_cut greedy_random_spins = greedy_env.best_spins # print('\nGraph {}, attempt : {}/{}, best cut : {}, greedy cut (rand init / +1 init) : {} / {}\t\t\t'.format( # i + 1, k, n_attemps, best_cut, greedy_random_cut, greedy_single_cut), # end="\r") print( '\nGraph {}, attempt : {}/{}, best cut : {}, greedy cut (rand init / +1 init) : {} / {}\t\t\t' .format(i + 1, k, n_attempts, best_cut, greedy_random_cut, greedy_single_cut), end=".") results.append([ best_cut, best_spins, greedy_single_cut, greedy_single_spins, greedy_random_cut, greedy_random_spins, np.mean(times) ]) return pd.DataFrame(data=results, columns=[ "cut", "sol", "greedy (+1 init) cut", "greedy (+1 init) sol", "greedy (rand init) cut", "greedy (rand init) sol", "time" ])