def test_playDummy_configExist_playsWithDummyAgent(self): config = Config(test=True) experiment = Experiment(config) envs = experiment.list_envs() for env in envs: experiment.set_env(env) if config.get_env_type() != 'unity': experiment.play_dummy(mode='rgb-array', model=None, num_episodes=3, num_steps=10)
def test_record_recordsParameters_multipleSavesOverwrites(self): config = Config(test=True) session_id = Experiment(config).get_session_id() experiments_path = config.get_app_experiments_path(train_mode=False) model = 'model123' header = ['episode', 'step', 'action', 'reward'] recorder = Recorder(header=header, experiments_path=experiments_path, session_id=session_id, model=model) header_result = recorder.get_header() assert all([a == b for a, b in zip(header, header_result)]) parameters1 = [1, 1, 0, 0] parameters2 = [1, 2, 1, 0] parameters3 = [1, 3, 0, 10] # episode 1 recorder.record(parameters1) recorder.record(parameters2) recorder.record(parameters3) recorder.save() df = recorder.get_dataframe() (config, log) = recorder.load() assert df.shape[0] == log.shape[0] # episode 2 recorder.record(parameters1) recorder.record(parameters2) recorder.record(parameters3) recorder.save() df = recorder.get_dataframe() (config, log) = recorder.load() assert df.shape[0] == log.shape[0] assert config['session_id'] == session_id assert config['model'] == model
def test_listEnvs_configExist_returnsEnvs(self): config = Config(test=True) experiment = Experiment(config) envs = experiment.list_envs() assert len(envs) > 1
def test_listTrainExperiments_selectExperiment_printsConfig(self): config = Config(test=True) explorer = Explorer(config=config) experiment = Experiment(config) analyzer = Analyzer(config=config, session_id=experiment.get_session_id()) experiments = explorer.list_train_experiments() analyzer.compare_train_config(experiments)
def test_train_configExist_canTrain1Episode(self): config = Config(test=True) experiment = Experiment(config) envs = experiment.list_envs() for env in envs: experiment.set_env(env) if config.get_env_type() != 'unity': max_steps = 128 max_episode_steps = 2 scores = experiment.train(max_steps=max_steps, eval_frequency=16, eval_steps=4, max_episode_steps=max_episode_steps) assert len(scores) == max_steps / max_episode_steps
def test_listTrainExperiments_selectExperiment_compareEpochScore(self): config = Config(test=True) explorer = Explorer(config=config) experiment = Experiment(config) analyzer = Analyzer(config=config, session_id=experiment.get_session_id()) experiments = explorer.list_train_experiments() for experiment in experiments: file = analyzer.compare_train_epoch_score(experiment) assert file is not None
def test_listTrainExperiments_selectExperiments_compareEpochData(self): config = Config(test=True) explorer = Explorer(config=config) experiment = Experiment(config) analyzer = Analyzer(config=config, session_id=experiment.get_session_id()) experiments = explorer.list_train_experiments() file = analyzer.compare_train_epoch_cols(experiments, 'avg_score') assert file is not None
def __init__(self, seed, cfg: Config): """Initialize an Agent object. Params ====== seed (int): random seed cfg (Config): configration """ # training parameters self.batch_size = 64 # minibatch size self.update_every = 4 # how often to update the network self.learning_rate = cfg.get_train_learning_rate() # learning rate self.tau = cfg.get_train_tau() # for soft update of target parameters self.gamma = cfg.get_current_env_train_gamma() # discount factor # agent parameters self.state_size = cfg.get_agent_state_size() self.action_size = cfg.get_agent_action_size() # replay memory parameters self.buffer_size = cfg.get_replay_memory_buffer_size() self.prioritized_replay = cfg.get_replay_memory_prioritized_replay_flag( ) self.prioritized_replay_alpha = cfg.get_replay_memory_prioritized_replay_alpha( ) self.prioritized_replay_beta0 = cfg.get_replay_memory_prioritized_replay_beta0( ) self.prioritized_replay_eps = cfg.get_replay_memory_prioritized_replay_eps( ) self.total_timesteps = cfg.get_train_max_steps() self.prioritized_replay_beta_iters = None # network parameters nn_cfg = cfg.get_neural_network_hiden_layers() dueling = cfg.get_neural_network_dueling_flag() self.double_dqn = cfg.get_neural_network_double_flag() self.__num_frames = cfg.get_agent_num_frames() # Q-Network self.current_model, self.target_model = ModelFactory.create( type='classic', fc_units=nn_cfg, num_frames=self.__num_frames, state_size=self.state_size, action_size=self.action_size, dueling=dueling, seed=seed, device=device) self.optimizer = optim.Adam(self.current_model.parameters(), lr=self.learning_rate) if self.prioritized_replay: self.memory = PrioritizedReplayBuffer( self.buffer_size, alpha=self.prioritized_replay_alpha) if self.prioritized_replay_beta_iters is None: prioritized_replay_beta_iters = self.total_timesteps self.beta_schedule = LinearSchedule( prioritized_replay_beta_iters, initial_p=self.prioritized_replay_beta0, final_p=1.0) else: self.memory = ReplayBuffer(self.buffer_size) self.beta_schedule = None self.step_i = 0 # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0 self.__frames_queue = deque(maxlen=self.__num_frames)