def __init__(self, env = ProcessedAtariEnv(), memory = PrioritizedExperienceReplay(), policy_network = None, target_network = None, frame_shape = (84, 84), save_path = None, logger = QLearningLogger): self.env = env self.memory = memory self.policy_network = policy_network self.target_network = target_network self.num_stacked_frames = memory.num_stacked_frames self.save_path = None if save_path is not None: self.save_path = save_path self.logger = logger(self.save_path) # internal variables self._current_state = np.zeros((1, self.num_stacked_frames, *frame_shape), dtype = np.uint8) self._step_counter = 0 # logging variables self._q_values = [] self._losses = [] self._score = 0.0
def __init__(self, env = ProcessedAtariEnv(), memory = PrioritizedExperienceReplay(), policy_network = None,#DeepQNetwork(), target_network = None,#DeepQNetwork(), frame_shape = (84, 84), save_path = None, num_actions = 4, epsilon = 0.05): BaseQAgent.__init__(self, env, memory, policy_network, target_network, frame_shape, save_path) self.epsilon = epsilon self.num_actions = num_actions
def __init__(self, env = ProcessedAtariEnv(), memory = PrioritizedExperienceReplay(), policy_network = None,#DeepQNetwork(), target_network = None,#DeepQNetwork(), frame_shape = (84, 84), save_path = None, num_actions = 4, eps_schedule = [[1.0, 0.1, 1000000], [0.1, 0.001, 5000000]]): BaseQAgent.__init__(self, env, memory, policy_network, target_network, frame_shape, save_path) self.eps_schedule = np.array(eps_schedule) self.eps_schedule[:,2] = np.cumsum(self.eps_schedule[:,2]) self.eps_lag = 0 self.num_actions = num_actions
def __init__(self, env = ProcessedAtariEnv(), memory = PrioritizedExperienceReplay(), policy_network = None,#DeepQNetwork(), target_network = None, #DeepQNetwork(), frame_shape = (84, 84), save_path = None, discount_factor = 0.99, n_step = None, double_q = False, expert_memory = None, prioritized_replay = False, num_actions = 4, epsilon = 0.05): EpsilonGreedyAgent.__init__(self, env, memory, policy_network, target_network, frame_shape, save_path, num_actions, epsilon) DQNAgent.__init__(self, env, memory, policy_network, target_network, frame_shape, save_path, discount_factor, n_step, double_q, expert_memory, prioritized_replay)
def __init__(self, env = ProcessedAtariEnv(), memory = PrioritizedExperienceReplay(), policy_network = None, #DeepQNetwork(), target_network = None, #DeepQNetwork(), frame_shape = (84, 84), save_path = None, discount_factor = 0.99, n_step = None, double_q = False, expert_memory = None, prioritized_replay = False, num_actions = 4, eps_schedule = [[1.0, 0.1, 1000000], [0.1, 0.001, 5000000]] ): EpsilonAnnealingAgent.__init__(self, env, memory, policy_network, target_network, frame_shape, save_path, num_actions, eps_schedule) DQNAgent.__init__(self, env, memory, policy_network, target_network, frame_shape, save_path, discount_factor, n_step, double_q, expert_memory, prioritized_replay)
def __init__(self, env = ProcessedAtariEnv(), memory = PrioritizedExperienceReplay(), policy_network = None,#DeepQNetwork(), target_network = None,#DeepQNetwork(), frame_shape = (84, 84), save_path = None, discount_factor = 0.99, n_step = None, double_q = False, expert_memory = None, prioritized_replay = False ): BaseQAgent.__init__(self, env, memory, policy_network, target_network, frame_shape, save_path) self._idx_range = np.arange(self.memory.batch_size, dtype = np.int32) self.discount_factor = discount_factor self.n_step = n_step self.expert_memory = expert_memory self.double_q = double_q self.prioritized_replay = prioritized_replay
import numpy as np from deep_q_agents import EpsAnnDQNAgent from deep_q_networks import DeepQNetwork from experience_replay import PrioritizedExperienceReplay from atari_preprocessing import atari_enduro_processor, ProcessedAtariEnv from openai_baseline_wrappers import make_atari, wrap_deepmind from load_data import LoadAtariHeadData #create environment frame_processor = atari_enduro_processor game_id = 'EnduroNoFrameskip-v4' env = make_atari(game_id) env = wrap_deepmind(env) env = ProcessedAtariEnv(env, frame_processor, reward_processor=lambda x: np.sign(x)) # additional env specific parameters frame_shape = env.reset().shape frame_skip = 4 num_stacked_frames = 4 num_actions = env.action_space.n # replay parameters batch_size = 32 max_frame_num = 2**20 prioritized_replay = True prio_coeff = 0.6 is_schedule = [0.4, 1.0, 2500000] replay_epsilon = 0.001
import numpy as np from deep_q_agents import EpsAnnDQNAgent from deep_q_networks import DeepQNetwork from experience_replay import PrioritizedExperienceReplay from atari_preprocessing import atari_pong_processor, ProcessedAtariEnv from openai_baseline_wrappers import make_atari, wrap_deepmind from load_data import LoadAtariHeadData #create environment frame_processor = atari_pong_processor game_id = 'PongNoFrameskip-v4' env = make_atari(game_id) env = wrap_deepmind(env) env = ProcessedAtariEnv(env, frame_processor, reward_processor = lambda x: np.sign(x), neg_reward_terminal = True) # additional env specific parameters frame_shape = env.reset().shape frame_skip = 4 num_stacked_frames = 4 num_actions = env.action_space.n # replay parameters batch_size = 32 max_frame_num = 2**20 prioritized_replay = True prio_coeff = 1.0 is_schedule = [0.0, 0.0, 2500000] replay_epsilon = 0.001 memory_restore_path = None