def test_default_algo(use_ref_model: bool = False): env = utils_env.Environment() # use default params ag = agent.DRLAgent() if use_ref_model: print('... Test the agent using reference model ...') ag.set_model_path('ref') al = algo.DRLAlgo(env, ag) al.test()
def fit(self, i: int, env: utils_env.Environment): self.ag = agent.DRLAgent(state_size, action_size, self.memory_size, self.gamma, self.batch_size, self.expl_noise, self.model_learning_rate, self.num_fc_actor, self.num_fc_critic) self.ag.set_model_path(i) # save each candidate's model separately self.al = algo.DRLAlgo(env, self.ag, self.num_episodes) self.al.set_image_path(i) # save each candidate's score separately history = self.al.train(with_close=False) # do not close the Env so that other agents can be trained score = self._get_score(history) return score
def test_default_algo(use_ref_model: bool = False): env = utils_env.Environment() model_name_suffix = '' if use_ref_model: print('... Test the agent using reference model ...') model_name_suffix = 'ref_' # use default params ag_1 = agent.DRLAgent() ag_1.set_model_path(model_name_suffix + str(1)) ag_2 = agent.DRLAgent() ag_2.set_model_path(model_name_suffix + str(2)) al = algo.DRLAlgo(env, ag_1, ag_2) al.test()
def train_two_agents(): env = utils_env.Environment() # use default params ag_1 = agent.DRLAgent() ag_1.set_model_path(1) ag_2 = agent.DRLAgent() ag_2.set_model_path(2) al = algo.DRLAlgo(env, ag_1, ag_2) history, best_e, best_score = al.train() print('\nFinal score: {:.3f}'.format( np.mean(history[-const.rolling_mean_N:]))) print('Final memory length:', ag_1.memory.get_length()) print('Best score in {:d} episodes, avg_score: {:.3f}'.format( best_e, best_score)) # plot losses losses_lists = [ ag_1.actor_loss_list, ag_2.actor_loss_list, ag_1.critic_loss_list, ag_2.critic_loss_list ] losses_labels = [ 'agent_1_actor', 'agent_2_actor', 'agent_1_critic', 'agent_2_critic' ] utils_plot.plot_loss(losses_lists, losses_labels) # plot noise utils_plot.plot_scatter(ag_1.noise_list, title_text='Noise', fp=const.file_path_img_noise) # plot memory actions memory_actions = np.array([t[1] for t in ag_1.memory.memory]) utils_plot.plot_scatter(memory_actions, title_text='Actions', fp=const.file_path_img_actions) # show mean memory actions mean_a = np.mean(memory_actions, axis=0) std_a = np.std(memory_actions, axis=0) print('Mean/std actions agent_1:', mean_a[:2], std_a[:2]) print('Mean/std actions agent_2:', mean_a[2:], std_a[2:])
def fit(self, i: int, env: utils_env.Environment): self.ag_1 = agent.DRLAgent(self.num_states, self.num_actions, self.memory_size, self.gamma, self.batch_size, self.tau, self.model_learning_rate, self.num_fc_1, self.num_fc_2) self.ag_1.set_model_path( str(i) + '_0') # save each candidate's model separately self.ag_2 = agent.DRLAgent(self.num_states, self.num_actions, self.memory_size, self.gamma, self.batch_size, self.tau, self.model_learning_rate, self.num_fc_1, self.num_fc_2) self.ag_2.set_model_path( str(i) + '_1') # save each candidate's model separately self.al = algo.DRLAlgo(env, self.ag_1, self.ag_2, self.num_episodes) self.al.set_image_path(i) # save each candidate's score separately # do not close the Env so that other agents can be trained history, _, best_score = self.al.train( with_close=False) # todo use best score? score = self._get_score(history) return score
def train_default_algo(): env = utils_env.Environment() # use default params ag = agent.DRLAgent() al = algo.DRLAlgo(env, ag) al.train()