def train(self, training_games_to_play = 500): training_progress_log = rysy.Log(self.network_path + "progress_training.log") if self.agent_type == "curiosity": icm_training_progress_log = rysy.Log(self.network_path + "icm_progress_training.log") self.training_games_to_play = training_games_to_play #process training while self.env.get_games_count() < self.training_games_to_play: result = self.agent.main() if result != 0: print("ERROR : agent returned ", result, "\n\n\n\n") return result #print training progress %, and score, every 256th iterations if self.env.get_iterations()%256 == 0: str_progress = str(self.env.get_iterations()) + " " str_progress+= str(self.env.get_games_count()) + " " str_progress+= str(self.agent.get_epsilon_start()) + " " str_progress+= str(self.env.get_score()) + " " str_progress+= str(self.env.get_active_env_id()) + " " score = self.env.get_envs_score() games = self.env.get_envs_games_count() for i in range(0, len(score)): str_progress+= str(games[i]) + " " str_progress+= str(score[i]) + " " str_progress+= "\n" training_progress_log.put_string(str_progress) if self.agent_type == "curiosity": str_icm_progress = str(self.env.get_iterations()) + " " str_icm_progress+= str(self.env.get_games_count()) + " " str_icm_progress+= str(self.agent.get_icm_result().inverse_loss) + " " str_icm_progress+= str(self.agent.get_icm_result().forward_loss) + " " str_icm_progress+= str(self.agent.get_icm_result().inverse_classification_success) + " " str_icm_progress+= "\n" icm_training_progress_log.put_string(str_icm_progress) print("done = ", self.env.get_games_count()*100.0/self.training_games_to_play, "%", " eps = ", self.agent.get_epsilon_start(), " iterations = ", self.env.get_iterations(), " score = ", self.env.get_score(), " active_env = ", self.env.get_active_env_id()) if self.env.get_iterations()%50000 == 0: print("SAVING network") self.agent.save(self.network_path) self.agent.save(self.network_path) return 0
def train(self, training_games_to_play=500): training_progress_log = rysy.Log(self.agent_config_path + "progress_training.log") self.training_games_to_play = training_games_to_play #process training while self.env.get_games_count() < self.training_games_to_play: self.agent.main() #print training progress %, ane score, every 256th iterations if self.env.get_iterations() % 256 == 0: str_progress = str(self.env.get_iterations()) + " " str_progress += str(self.env.get_games_count()) + " " str_progress += str(self.env.get_score()) + " " str_progress += "\n" training_progress_log.put_string(str_progress) print( "done = ", self.env.get_games_count() * 100.0 / self.training_games_to_play, "%", " iterations = ", self.env.get_iterations(), " score = ", self.env.get_score()) if self.env.get_iterations() % 50000 == 0: print("SAVING network") self.agent.save(self.agent_config_path) self.agent.save(self.agent_config_path)
def test(self, testing_games_to_play=100): testing_progress_log = rysy.Log(self.network_path + "progress_testing.log") self.agent.load(self.network_path) #reset score self.env.reset_score() #choose only the best action self.agent.run_best_enable() #process testing games while self.env.get_games_count( ) < testing_games_to_play + self.training_games_to_play: self.agent.main() if self.env.get_iterations() % 256 == 0: str_progress = str(self.env.get_iterations()) + " " str_progress += str(self.env.get_games_count() - +self.training_games_to_play) + " " str_progress += str(self.agent.get_epsilon_training()) + " " str_progress += str(self.env.get_score()) + " " str_progress += "\n" testing_progress_log.put_string(str_progress) print("TESTING SCORE =", self.env.get_score()) return self.env.get_score()
def train(self, training_games_to_play=500): training_progress_log = rysy.Log(self.network_path + "progress_training.log") icm_training_progress_log = rysy.Log(self.network_path + "icm_progress_training.log") self.training_games_to_play = training_games_to_play #process training while self.env.get_games_count() < self.training_games_to_play: self.agent.main() #print training progress %, ane score, every 256th iterations if self.env.get_iterations() % 256 == 0: str_progress = str(self.env.get_iterations()) + " " str_progress += str(self.env.get_games_count()) + " " str_progress += str(self.agent.get_epsilon_training()) + " " str_progress += str(self.env.get_score()) + " " str_progress += "\n" training_progress_log.put_string(str_progress) print( "done = ", self.env.get_games_count() * 100.0 / self.training_games_to_play, "%", " eps = ", self.agent.get_epsilon_training(), " iterations = ", self.env.get_iterations(), " score = ", self.env.get_score()) if self.env.get_iterations() % 256 == 0: str_icm_progress = str(self.env.get_iterations()) + " " str_icm_progress += str(self.env.get_games_count()) + " " str_icm_progress += str( self.agent.get_icm_result().inverse_loss) + " " str_icm_progress += str( self.agent.get_icm_result().forward_loss) + " " str_icm_progress += str(self.agent.get_icm_result(). inverse_classification_success) + " " str_icm_progress += "\n" icm_training_progress_log.put_string(str_icm_progress) if self.env.get_iterations() % 50000 == 0: print("SAVING network") self.agent.save(self.network_path) self.agent.save(self.network_path)
def __init__(self): env.Env.__init__(self) self.width = 4 self.height = 4 self.depth = 1 self.time = 1 self.actions_count = 4 self.observation_init() self.reset() self.gui = gl_gui.GLVisualisation() self.max_value = 0.0 self.max_score_log = rysy.Log("2048_max_score.log")
def test(self, testing_games_to_play = 100): testing_progress_log = rysy.Log(self.network_path + "progress_testing.log") self.agent.load(self.network_path + "trained/") #reset score self.env.reset_score() #choose only the best action self.agent.run_best_enable() #process testing games while self.env.get_games_count() < testing_games_to_play + self.training_games_to_play: result = self.agent.main() if result != 0: print("ERROR : agent returned ", result, "\n\n\n\n") return result if self.env.get_iterations()%256 == 0: str_progress = str(self.env.get_iterations()) + " " str_progress+= str(self.env.get_games_count() - self.training_games_to_play) + " " str_progress+= str(self.agent.get_epsilon_start()) + " " str_progress+= str(self.env.get_score()) + " " str_progress+= str(self.env.get_active_env_id()) + " " score = self.env.get_envs_score() games = self.env.get_envs_games_count() for i in range(0, len(score)): str_progress+= str(games[i]) + " " str_progress+= str(score[i]) + " " str_progress+= "\n" testing_progress_log.put_string(str_progress) print("TESTING SCORE =", env.get_score()) return env.get_score()
def test(self, log_filename_prefix, testing_games_to_play=100): self.agent.load(self.agent_config_path) #choose only the best action self.agent.run_best_enable() score = [] game_id = 0 #process testing games while self.env.get_games_count( ) < testing_games_to_play + self.training_games_to_play: self.agent.main() if self.env.get_games_count() != game_id: game_id = self.env.get_games_count() score.append(self.env.get_score()) self.env.reset_score() print(score) mean_score = numpy.mean(score) std = numpy.std(score) result = "games count : " + str(len(score)) + "\n" result += "mean score : " + str(mean_score) + "\n" result += "std score : " + str(std) + "\n" result += "games : " + "\n" for i in range(0, len(score)): result += str(score[i]) + "\n" testing_progress_log = rysy.Log(self.agent_config_path + log_filename_prefix + "result_testing.log") testing_progress_log.put_string(result)
epsilon_decay = 0.99999 #init DQN agent agent = libs.libs_agent.agent_dqn.DQNAgent(env, network_path + "network_config.json", gamma, replay_buffer_size, epsilon_training, epsilon_testing, epsilon_decay) ''' agent.load(network_path + "trained/") agent.run_best_enable() while True: agent.main() env._print() ''' training_progress_log = rysy.Log(network_path + "progress_training.log") testing_progress_log = rysy.Log(network_path + "progress_testing.log") #process training total_games_to_play = 20000 while env.get_games_count() < total_games_to_play: agent.main() #print training progress %, ane score, every 256th iterations if verbose: if env.get_iterations()%256 == 0: env._print() env.render() if env.get_iterations()%256 == 0: