def __init__(self, game, sess, nb_actions, global_step): BaseAgent.__init__(self, game, sess, nb_actions, global_step) self.name = "SF_linear_agent" self.model_path = os.path.join(FLAGS.checkpoint_dir, FLAGS.algorithm) self.nb_action = nb_actions self.episode_rewards = [] self.episode_lengths = [] self.episode_mean_values = [] self.episode_max_values = [] self.episode_min_values = [] self.episode_mean_returns = [] self.episode_max_returns = [] self.episode_min_returns = [] self.exploration = LinearSchedule(FLAGS.explore_steps, FLAGS.final_random_action_prob, FLAGS.initial_random_action_prob) self.summary_writer = tf.summary.FileWriter( os.path.join(FLAGS.summaries_dir, FLAGS.algorithm)) self.summary = tf.Summary() self.nb_states = game.nb_states self.q_net = SFLinearNetwork(nb_actions, self.nb_states, 'orig') self.target_net = SFLinearNetwork(nb_actions, self.nb_states, 'target') self.targetOps = self.update_target_graph('orig', 'target') self.probability_of_random_action = self.exploration.value(0)
def __init__(self, game, sess, nb_actions, global_step): BaseAgent.__init__(self, game, sess, nb_actions, global_step) self.name = "SF_agent" self.model_path = os.path.join(FLAGS.checkpoint_dir, FLAGS.algorithm) self.nb_states = self.env.nb_states if FLAGS.matrix_type == "incidence": self.sf_buffer = np.zeros( [self.nb_states * self.nb_states, self.nb_states]) else: self.sf_buffer = np.zeros([self.nb_states, self.nb_states]) self.seen_states = set() self.episode_rewards = [] self.episode_lengths = [] self.episode_mean_values = [] self.episode_max_values = [] self.episode_min_values = [] self.episode_mean_returns = [] self.episode_max_returns = [] self.episode_min_returns = [] self.exploration = LinearSchedule(FLAGS.explore_steps, FLAGS.final_random_action_prob, FLAGS.initial_random_action_prob) self.summary_writer = tf.summary.FileWriter( os.path.join(FLAGS.summaries_dir, FLAGS.algorithm)) self.summary = tf.Summary() self.sf_table = np.zeros([self.nb_states, self.nb_states]) # self.q_net = SFNetwork(self.nb_actions, self.nb_states, 'orig') # self.target_net = SFNetwork(self.nb_actions, self.nb_states, 'target') # # self.targetOps = self.update_target_graph('orig', 'target') # self.probability_of_random_action = self.exploration.value(0)
def __init__(self, game, sess, nb_actions, global_step): BaseAgent.__init__(self, game, sess, nb_actions, global_step) self.name = "CategoricalDQN_agent" self.model_path = os.path.join(FLAGS.checkpoint_dir, FLAGS.algorithm) self.support = np.linspace(FLAGS.v_min, FLAGS.v_max, FLAGS.nb_atoms) self.delta_z = (FLAGS.v_max - FLAGS.v_min) / (FLAGS.nb_atoms - 1) self.episode_rewards = [] self.episode_lengths = [] self.episode_mean_values = [] self.episode_max_values = [] self.episode_min_values = [] self.episode_mean_returns = [] self.episode_max_returns = [] self.episode_min_returns = [] self.exploration = LinearSchedule(FLAGS.explore_steps, FLAGS.final_random_action_prob, FLAGS.initial_random_action_prob) self.summary_writer = tf.summary.FileWriter(os.path.join(FLAGS.summaries_dir, FLAGS.algorithm)) self.summary = tf.Summary() self.q_net = CategoricalDQNetwork(nb_actions, 'orig') self.target_net = CategoricalDQNetwork(nb_actions, 'target') self.targetOps = self.update_target_graph('orig', 'target') self.probability_of_random_action = self.exploration.value(0)
def run(self): # Use the agent to find mines in our mine-sweeper environment if self.agent_name == self.BasicAgent: self.mine_sweeper_agent = BaseAgent(env=self.env) elif self.agent_name == self.CSPAgent: self.mine_sweeper_agent = CSPAgent( env=self.env, end_game_on_mine_hit=self.end_game_on_mine_hit) else: self.mine_sweeper_agent = ProbCSPAgent( env=self.env, end_game_on_mine_hit=self.end_game_on_mine_hit, use_probability_agent=self.use_probability_agent, prob=self.bonus_uncertain_p) self.mine_sweeper_agent.play() metrics = self.mine_sweeper_agent.get_gameplay_metrics() # print("Game won = ", str(metrics["game_won"])) print("Number of mines hit = ", str(metrics["number_of_mines_hit"])) print("Number of mines flagged correctly = ", str(metrics["number_of_mines_flagged_correctly"])) print("Number of cells flagged incorrectly = ", str(metrics["number_of_cells_flagged_incorrectly"])) self.env.render_env(100)
def __init__(self, action_space, cmdl, is_training=True): BaseAgent.__init__(self, action_space, is_training) self.name = "DQN_agent" self.cmdl = cmdl eps = self.cmdl.epsilon e_steps = self.cmdl.epsilon_steps self.policy = policy = get_model(cmdl.estimator, 1, cmdl.hist_len, self.action_no, cmdl.hidden_size) self.target = target = get_model(cmdl.estimator, 1, cmdl.hist_len, self.action_no, cmdl.hidden_size) if self.cmdl.cuda: self.policy.cuda() self.target.cuda() self.policy_evaluation = DQNEvaluation(policy) self.policy_improvement = DQNImprovement(policy, target, cmdl) self.exploration = get_epsilon_schedule("linear", eps, 0.05, e_steps) self.replay_memory = ReplayMemory(capacity=cmdl.experience_replay) self.dtype = TorchTypes(cmdl.cuda) self.max_q = -1000
def main(): # setup the GPU/CPU device if torch.cuda.is_available(): torch.cuda.set_device(int(Utilis.gpu_id_with_max_memory())) # prepare the Log for recording the RL procedure cfg = Utilis.config() game = None try: working_mode = cfg['GLOBAL'].get('working_mode') working_agent = BaseAgent.create(cfg) if working_agent is None: print("Working Agent not found.") return game = Game(cfg) if working_mode == 'train': print('******************The Dino is being trained by ' + cfg['GLOBAL'].get('working_agent') + '*************************') logger = Logger.get_instance() logger.create_log(cfg) working_agent.train(game) elif working_mode == 'replay': print( '******************The Dino is being replayed*************************' ) working_agent.replay(game) else: print( "working mode is not found. Check the spelling of working mode in config.ini. " ) finally: if game is not None: game.end()