示例#1
0
    def __init__(self,
                 env = ProcessedAtariEnv(),
                 memory = PrioritizedExperienceReplay(), 
                 policy_network = None,
                 target_network = None, 
                 frame_shape = (84, 84),
                 save_path = None, 
                 logger = QLearningLogger):
        
        self.env = env
        self.memory = memory
        self.policy_network = policy_network
        self.target_network = target_network

        self.num_stacked_frames = memory.num_stacked_frames
        
        self.save_path = None
        if save_path is not None:
            self.save_path = save_path
            self.logger = logger(self.save_path)
        
        # internal variables
        self._current_state = np.zeros((1, self.num_stacked_frames, *frame_shape), dtype = np.uint8)
        self._step_counter = 0
        
        # logging variables
        self._q_values = []
        self._losses = []
        self._score = 0.0
示例#2
0
 def __init__(self,
              env = ProcessedAtariEnv(),
              memory = PrioritizedExperienceReplay(), 
              policy_network = None,#DeepQNetwork(),
              target_network = None,#DeepQNetwork(),
              frame_shape = (84, 84),
              save_path = None,
              
              num_actions = 4,
              epsilon = 0.05):
     
     BaseQAgent.__init__(self, env, memory, policy_network, target_network, frame_shape, save_path)
     self.epsilon = epsilon
     self.num_actions = num_actions
示例#3
0
 def __init__(self,
              env = ProcessedAtariEnv(),
              memory = PrioritizedExperienceReplay(), 
              policy_network = None,#DeepQNetwork(),
              target_network = None,#DeepQNetwork(),
              frame_shape = (84, 84),
              save_path = None,
              
              num_actions = 4,
              eps_schedule = [[1.0, 0.1, 1000000],
                              [0.1, 0.001, 5000000]]):
     
     BaseQAgent.__init__(self, env, memory, policy_network, target_network, frame_shape, save_path)
     
     self.eps_schedule = np.array(eps_schedule)
     self.eps_schedule[:,2] = np.cumsum(self.eps_schedule[:,2])
     self.eps_lag = 0
     self.num_actions = num_actions
示例#4
0
 def __init__(self,
              env = ProcessedAtariEnv(),
              memory = PrioritizedExperienceReplay(), 
              policy_network = None,#DeepQNetwork(),
              target_network = None, #DeepQNetwork(),
              frame_shape = (84, 84),
              save_path = None,
             
              discount_factor = 0.99,
              n_step = None,
              double_q = False,
              expert_memory = None,
              prioritized_replay = False,
             
              num_actions = 4,
              epsilon = 0.05):
     
     EpsilonGreedyAgent.__init__(self, env, memory, policy_network, target_network, frame_shape, save_path, num_actions, epsilon)
     DQNAgent.__init__(self, env, memory, policy_network, target_network, frame_shape, save_path, discount_factor, n_step, double_q, expert_memory, prioritized_replay)
示例#5
0
 def __init__(self,
              env = ProcessedAtariEnv(),
              memory = PrioritizedExperienceReplay(), 
              policy_network = None, #DeepQNetwork(),
              target_network = None, #DeepQNetwork(),
              frame_shape = (84, 84),
              save_path = None,
             
              discount_factor = 0.99,
              n_step = None,
              double_q = False,
              expert_memory = None,
              prioritized_replay = False,
             
              num_actions = 4,
              eps_schedule = [[1.0, 0.1, 1000000],
                              [0.1, 0.001, 5000000]]
              ):
     
     EpsilonAnnealingAgent.__init__(self, env, memory, policy_network, target_network, frame_shape, save_path, num_actions, eps_schedule)
     DQNAgent.__init__(self, env, memory, policy_network, target_network, frame_shape, save_path, discount_factor, n_step, double_q, expert_memory, prioritized_replay)
示例#6
0
 def __init__(self,
              env = ProcessedAtariEnv(),
              memory = PrioritizedExperienceReplay(), 
              policy_network = None,#DeepQNetwork(),
              target_network = None,#DeepQNetwork(),
              frame_shape = (84, 84),
              save_path = None,
              
              discount_factor = 0.99,
              n_step = None,
              double_q = False,
              expert_memory = None,
              prioritized_replay = False
              ):
     
     BaseQAgent.__init__(self, env, memory, policy_network, target_network, frame_shape, save_path)
     
     self._idx_range = np.arange(self.memory.batch_size, dtype = np.int32)
     
     self.discount_factor = discount_factor
     self.n_step = n_step
     self.expert_memory = expert_memory
     self.double_q = double_q
     self.prioritized_replay = prioritized_replay
import numpy as np

from deep_q_agents import EpsAnnDQNAgent
from deep_q_networks import DeepQNetwork
from experience_replay import PrioritizedExperienceReplay
from atari_preprocessing import atari_enduro_processor, ProcessedAtariEnv
from openai_baseline_wrappers import make_atari, wrap_deepmind
from load_data import LoadAtariHeadData

#create environment
frame_processor = atari_enduro_processor
game_id = 'EnduroNoFrameskip-v4'
env = make_atari(game_id)
env = wrap_deepmind(env)
env = ProcessedAtariEnv(env,
                        frame_processor,
                        reward_processor=lambda x: np.sign(x))

# additional env specific parameters
frame_shape = env.reset().shape
frame_skip = 4
num_stacked_frames = 4
num_actions = env.action_space.n

# replay parameters
batch_size = 32
max_frame_num = 2**20
prioritized_replay = True
prio_coeff = 0.6
is_schedule = [0.4, 1.0, 2500000]
replay_epsilon = 0.001
示例#8
0
import numpy as np

from deep_q_agents import EpsAnnDQNAgent
from deep_q_networks import DeepQNetwork
from experience_replay import PrioritizedExperienceReplay
from atari_preprocessing import atari_pong_processor, ProcessedAtariEnv
from openai_baseline_wrappers import make_atari, wrap_deepmind
from load_data import LoadAtariHeadData


#create environment
frame_processor = atari_pong_processor
game_id = 'PongNoFrameskip-v4'
env = make_atari(game_id)
env = wrap_deepmind(env)
env = ProcessedAtariEnv(env, frame_processor, reward_processor = lambda x: np.sign(x), neg_reward_terminal = True)

# additional env specific parameters
frame_shape = env.reset().shape
frame_skip = 4
num_stacked_frames = 4
num_actions = env.action_space.n

# replay parameters
batch_size = 32
max_frame_num = 2**20
prioritized_replay = True
prio_coeff = 1.0
is_schedule = [0.0, 0.0, 2500000]
replay_epsilon = 0.001
memory_restore_path = None