Пример #1
0
    def test_playDummy_configExist_playsWithDummyAgent(self):

        config = Config(test=True)
        experiment = Experiment(config)

        envs = experiment.list_envs()

        for env in envs:
            experiment.set_env(env)

            if config.get_env_type() != 'unity':
                experiment.play_dummy(mode='rgb-array',
                                      model=None,
                                      num_episodes=3,
                                      num_steps=10)
Пример #2
0
    def test_record_recordsParameters_multipleSavesOverwrites(self):

        config = Config(test=True)
        session_id = Experiment(config).get_session_id()
        experiments_path = config.get_app_experiments_path(train_mode=False)
        model = 'model123'

        header = ['episode', 'step', 'action', 'reward']

        recorder = Recorder(header=header,
                            experiments_path=experiments_path,
                            session_id=session_id,
                            model=model)

        header_result = recorder.get_header()

        assert all([a == b for a, b in zip(header, header_result)])

        parameters1 = [1, 1, 0, 0]
        parameters2 = [1, 2, 1, 0]
        parameters3 = [1, 3, 0, 10]

        # episode 1
        recorder.record(parameters1)
        recorder.record(parameters2)
        recorder.record(parameters3)

        recorder.save()

        df = recorder.get_dataframe()
        (config, log) = recorder.load()

        assert df.shape[0] == log.shape[0]

        # episode 2
        recorder.record(parameters1)
        recorder.record(parameters2)
        recorder.record(parameters3)

        recorder.save()

        df = recorder.get_dataframe()
        (config, log) = recorder.load()

        assert df.shape[0] == log.shape[0]
        assert config['session_id'] == session_id
        assert config['model'] == model
Пример #3
0
    def test_listEnvs_configExist_returnsEnvs(self):

        config = Config(test=True)
        experiment = Experiment(config)

        envs = experiment.list_envs()

        assert len(envs) > 1
Пример #4
0
    def test_listTrainExperiments_selectExperiment_printsConfig(self):
        config = Config(test=True)
        explorer = Explorer(config=config)
        experiment = Experiment(config)
        analyzer = Analyzer(config=config, session_id=experiment.get_session_id())

        experiments = explorer.list_train_experiments()

        analyzer.compare_train_config(experiments)
Пример #5
0
    def test_train_configExist_canTrain1Episode(self):
        config = Config(test=True)
        experiment = Experiment(config)

        envs = experiment.list_envs()

        for env in envs:
            experiment.set_env(env)

            if config.get_env_type() != 'unity':
                max_steps = 128
                max_episode_steps = 2
                scores = experiment.train(max_steps=max_steps,
                                          eval_frequency=16,
                                          eval_steps=4,
                                          max_episode_steps=max_episode_steps)

                assert len(scores) == max_steps / max_episode_steps
Пример #6
0
    def test_listTrainExperiments_selectExperiment_compareEpochScore(self):
        config = Config(test=True)
        explorer = Explorer(config=config)
        experiment = Experiment(config)
        analyzer = Analyzer(config=config, session_id=experiment.get_session_id())

        experiments = explorer.list_train_experiments()

        for experiment in experiments:
            file = analyzer.compare_train_epoch_score(experiment)
            assert file is not None
Пример #7
0
    def test_listTrainExperiments_selectExperiments_compareEpochData(self):
        config = Config(test=True)
        explorer = Explorer(config=config)
        experiment = Experiment(config)
        analyzer = Analyzer(config=config, session_id=experiment.get_session_id())

        experiments = explorer.list_train_experiments()

        file = analyzer.compare_train_epoch_cols(experiments, 'avg_score')

        assert file is not None
Пример #8
0
    def __init__(self, seed, cfg: Config):
        """Initialize an Agent object.

        Params
        ======
            seed (int): random seed
            cfg (Config): configration
        """

        # training parameters
        self.batch_size = 64  # minibatch size
        self.update_every = 4  # how often to update the network
        self.learning_rate = cfg.get_train_learning_rate()  # learning rate
        self.tau = cfg.get_train_tau()  # for soft update of target parameters
        self.gamma = cfg.get_current_env_train_gamma()  # discount factor

        # agent parameters
        self.state_size = cfg.get_agent_state_size()
        self.action_size = cfg.get_agent_action_size()

        # replay memory parameters
        self.buffer_size = cfg.get_replay_memory_buffer_size()
        self.prioritized_replay = cfg.get_replay_memory_prioritized_replay_flag(
        )
        self.prioritized_replay_alpha = cfg.get_replay_memory_prioritized_replay_alpha(
        )
        self.prioritized_replay_beta0 = cfg.get_replay_memory_prioritized_replay_beta0(
        )
        self.prioritized_replay_eps = cfg.get_replay_memory_prioritized_replay_eps(
        )
        self.total_timesteps = cfg.get_train_max_steps()
        self.prioritized_replay_beta_iters = None

        # network parameters
        nn_cfg = cfg.get_neural_network_hiden_layers()
        dueling = cfg.get_neural_network_dueling_flag()
        self.double_dqn = cfg.get_neural_network_double_flag()
        self.__num_frames = cfg.get_agent_num_frames()

        # Q-Network
        self.current_model, self.target_model = ModelFactory.create(
            type='classic',
            fc_units=nn_cfg,
            num_frames=self.__num_frames,
            state_size=self.state_size,
            action_size=self.action_size,
            dueling=dueling,
            seed=seed,
            device=device)

        self.optimizer = optim.Adam(self.current_model.parameters(),
                                    lr=self.learning_rate)

        if self.prioritized_replay:
            self.memory = PrioritizedReplayBuffer(
                self.buffer_size, alpha=self.prioritized_replay_alpha)
            if self.prioritized_replay_beta_iters is None:
                prioritized_replay_beta_iters = self.total_timesteps
            self.beta_schedule = LinearSchedule(
                prioritized_replay_beta_iters,
                initial_p=self.prioritized_replay_beta0,
                final_p=1.0)
        else:
            self.memory = ReplayBuffer(self.buffer_size)
            self.beta_schedule = None

        self.step_i = 0

        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
        self.__frames_queue = deque(maxlen=self.__num_frames)