def __init__(self, seed, cfg: Configuration, num_agents=1):
        super(DdpgAgent, self).__init__(cfg)
        """Initialize an Agent object.

        Params
        ======
            seed (int): random seed
            cfg (Config): configration
            num_agents(int): number of agents
        """

        self.num_agents = cfg.get_current_exp_cfg().environment_cfg.num_agents

        self.state_size = cfg.get_current_exp_cfg().agent_cfg.state_size
        self.action_size = cfg.get_current_exp_cfg().agent_cfg.action_size

        # Actor Network (w/ Target Network)
        # Critic Network (w/ Target Network)
        self.actor_current_model, self.actor_target_model, self.critic_current_model, self.critic_target_model = ModelFactory.create(
            seed, device, cfg)

        self.actor_optimizer = optim.Adam(
            self.actor_current_model.parameters(),
            lr=self.reinforcement_learning_cfg.ddpg_cfg.lr_actor)

        self.critic_optimizer = optim.Adam(
            self.critic_current_model.parameters(),
            lr=self.reinforcement_learning_cfg.ddpg_cfg.lr_critic,
            weight_decay=self.reinforcement_learning_cfg.ddpg_cfg.weight_decay)

        # Noise process
        self.noise = OUNoiseStandardNormal(self.action_size, seed)

        # Replay Memory
        if self.replay_memory_cfg.prioritized_replay:
            self.memory = PrioritizedReplayBuffer(
                int(self.replay_memory_cfg.buffer_size),
                alpha=self.replay_memory_cfg.prioritized_replay_alpha)
            if self.prioritized_replay_beta_iters is None:
                prioritized_replay_beta_iters = self.trainer_cfg.max_steps
            self.beta_schedule = LinearSchedule(
                prioritized_replay_beta_iters,
                initial_p=self.replay_memory_cfg.prioritized_replay_beta0,
                final_p=1.0)
        else:
            self.memory = ReplayBuffer(self.replay_memory_cfg.buffer_size)
            self.beta_schedule = None

        # Initialize time step counter (for prioritized memory replay)
        self.step_counter = 0
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.step_update_counter = 0
    def test_record_recordsParameters_multipleSavesOverwrites(self):

        config = Configuration(test_flag=True)
        session_id = Experiment(config).get_session_id()
        experiments_path = config.get_app_experiments_path(train_mode=False)
        model = 'model123'

        header = ['episode', 'step', 'action', 'reward']

        recorder = Recorder(header=header,
                            experiments_path=experiments_path,
                            session_id=session_id,
                            model=model,
                            configuration=config.get_current_exp_cfg())

        header_result = recorder.get_header()

        assert all([a == b for a, b in zip(header, header_result)])

        parameters1 = [1, 1, 0, 0]
        parameters2 = [1, 2, 1, 0]
        parameters3 = [1, 3, 0, 10]

        # episode 1
        recorder.record(parameters1)
        recorder.record(parameters2)
        recorder.record(parameters3)

        recorder.save()

        df = recorder.get_dataframe()
        (config, log) = recorder.load()

        assert df.shape[0] == log.shape[0]

        # episode 2
        recorder.record(parameters1)
        recorder.record(parameters2)
        recorder.record(parameters3)

        recorder.save()

        df = recorder.get_dataframe()
        (config, log) = recorder.load()

        assert df.shape[0] == log.shape[0]
        assert config['session_id'] == session_id
        assert config['model'] == model
    def test_listPlayExperiments_experimentsExist_returnsExperiments(self):

        config = Configuration(test_flag=True)
        explorer = Explorer(config=config)

        experiments = explorer.list_play_experiments()
        assert len(experiments) > 0
    def __init__(self, cfg: Configuration):

        # training parameters
        self.trainer_cfg = cfg.get_current_exp_cfg().trainer_cfg

        # agent parameters
        self.agent_cfg = cfg.get_current_exp_cfg().agent_cfg

        # replay memory parameters
        self.replay_memory_cfg = cfg.get_current_exp_cfg().replay_memory_cfg
        self.prioritized_replay_beta_iters = None

        # reinforcement learning parameters
        self.reinforcement_learning_cfg = cfg.get_current_exp_cfg(
        ).reinforcement_learning_cfg

        self.frames_queue = deque(maxlen=self.agent_cfg.num_frames)
示例#5
0
    def test_listTrainExperiments_selectExperiment_printsConfig(self):
        config = Configuration(test_flag=True)
        explorer = Explorer(config=config)
        experiment = Experiment(config)
        analyzer = Analyzer(config=config,
                            session_id=experiment.get_session_id())

        experiments = explorer.list_train_experiments()

        analyzer.compare_train_config(experiments)
示例#6
0
    def test_listTrainExperiments_selectExperiment_compareEpochScore(self):
        config = Configuration(test_flag=True)
        explorer = Explorer(config=config)
        experiment = Experiment(config)
        analyzer = Analyzer(config=config,
                            session_id=experiment.get_session_id())

        experiments = explorer.list_train_experiments()

        for experiment in experiments:
            file = analyzer.compare_train_epoch_score(experiment)
            assert file is not None
    def create(config: Configuration) -> IEnvironment:

        env_name = config.get_current_exp_cfg().gym_id

        env_type = config.get_current_exp_cfg().environment_cfg.env_type

        if env_type == 'gym':
            env = GymStandardEnv(name=env_name)
        elif env_type == 'atari_gym':
            env = GymAtariEnv(name=env_name)
        elif env_type == 'spaceinvaders_atari_gym':
            env = GymAtariSpaceInvadersEnv(name=env_name)
        elif env_type == 'unity':
            env = UnityEnv(name=env_name)
        elif env_type == 'unity-multiple':
            env = UnityMultipleEnv(name=env_name)
        else:
            raise Exception(
                "Environment '{}' type not supported".format(env_type))

        return env
示例#8
0
    def __init__(self, cfg: Configuration, session_id, path_models='models'):

        super(MasterTrainer, self).__init__(cfg, session_id)

        reinforcement_learning_cfg = self.cfg.get_current_exp_cfg().reinforcement_learning_cfg

        if cfg.get_current_exp_cfg().reinforcement_learning_cfg.algorithm_type.startswith('ddpg'):
            self.eps = reinforcement_learning_cfg.ddpg_cfg.epsilon_start   # initialize epsilon
            self.eps_end = reinforcement_learning_cfg.ddpg_cfg.epsilon_end
            self.eps_decay = reinforcement_learning_cfg.ddpg_cfg.epsilon_decay
        else:
            self.eps = reinforcement_learning_cfg.dqn_cfg.epsilon_start   # initialize epsilon
            self.eps_end = reinforcement_learning_cfg.dqn_cfg.epsilon_end
            self.eps_decay = reinforcement_learning_cfg.dqn_cfg.epsilon_decay
    def create(cfg: Configuration, session_id) -> ITrainer:

        algorithm_type = cfg.get_current_exp_cfg(
        ).reinforcement_learning_cfg.algorithm_type

        if algorithm_type.startswith('dqn'):
            trainer = MasterTrainer(cfg=cfg, session_id=session_id)
        elif algorithm_type.startswith('ddpg'):
            trainer = MasterTrainer(cfg=cfg, session_id=session_id)
        else:
            raise Exception(
                "Trainer for algorighm '{}' type not supported".format(
                    algorithm_type))

        return trainer
示例#10
0
    def create(config: Configuration, seed=0) -> IAgent:

        algorithm_type = config.get_current_exp_cfg(
        ).reinforcement_learning_cfg.algorithm_type

        if algorithm_type.startswith('dqn'):
            agent = DqnAgent(seed=seed, cfg=config)
        elif algorithm_type.startswith('ddpg'):
            agent = DdpgAgent(seed=seed, cfg=config)
        else:
            raise Exception(
                "Agent for algorighm '{}' type not supported".format(
                    algorithm_type))

        return agent
示例#11
0
    def get():
        cfg = \
            {
                "experiment_cfgs": [
                    {
                        "id": "lunarlander",
                        "gym_id": "LunarLander-v2",
                        "agent_cfg": {
                            "action_size": 4,
                            "discrete": True,
                            "num_frames": 1,
                            "state_rgb": False,
                            "state_size": 8
                        },
                        "environment_cfg": {
                            "env_type": "gym",
                            "num_agents": 1
                        },
                        "reinforcement_learning_cfg": {
                            "algorithm_type": "dqn",
                            "dqn_cfg": {
                                "epsilon_start": 1.0,
                                "epsilon_end": 0.01,
                                "epsilon_decay": 0.995,
                                "lr": 0.0001,
                                "model_cfg": {
                                    "hidden_layers": [
                                        64,
                                        64
                                    ]
                                },
                            },
                            "ddpg_cfg": None
                        },
                        "replay_memory_cfg": {
                            "buffer_size": 100000,
                            "prioritized_replay": True,
                            "prioritized_replay_alpha": 0.6,
                            "prioritized_replay_beta0": 0.4,
                            "prioritized_replay_eps": 1e-06
                        },
                        "trainer_cfg": {
                            "batch_size": 64,
                            "eval_frequency": 16,
                            "eval_steps": 4,
                            "gamma": 0.99,
                            "human_flag": False,
                            "max_episode_steps": 2,
                            "max_steps": 128,
                            "tau": 0.001,
                            "update_every": 4,
                            "num_updates": 1
                        }
                    },
                    {
                        "id": "breakout",
                        "gym_id": "Breakout-ram-v4",
                        "agent_cfg": {
                            "action_size": 3,
                            "discrete": True,
                            "num_frames": 1,
                            "state_rgb": False,
                            "state_size": 128
                        },
                        "environment_cfg": {
                            "env_type": "spaceinvaders_atari_gym",
                            "num_agents": 1
                        },
                        "reinforcement_learning_cfg": {
                            "algorithm_type": "dqn",
                            "dqn_cfg": {
                                "epsilon_start": 1.0,
                                "epsilon_end": 0.01,
                                "epsilon_decay": 0.995,
                                "lr": 0.0001,
                                "model_cfg": {
                                    "hidden_layers": [
                                        64,
                                        64
                                    ]
                                },
                            },
                            "ddpg_cfg": None
                        },
                        "replay_memory_cfg": {
                            "buffer_size": 100000,
                            "prioritized_replay": True,
                            "prioritized_replay_alpha": 0.6,
                            "prioritized_replay_beta0": 0.4,
                            "prioritized_replay_eps": 1e-06
                        },
                        "trainer_cfg": {
                            "batch_size": 64,
                            "eval_frequency": 16,
                            "eval_steps": 4,
                            "gamma": 0.99,
                            "human_flag": False,
                            "max_episode_steps": 2,
                            "max_steps": 128,
                            "tau": 0.001,
                            "update_every": 4,
                            "num_updates": 1
                        }
                    },
                    {
                        "id": "breakout-rgb",
                        "gym_id": "Breakout-v4",
                        "agent_cfg": {
                            "action_size": 3,
                            "discrete": True,
                            "num_frames": 1,
                            "state_rgb": True,
                            "state_size": [80, 80]
                        },
                        "environment_cfg": {
                            "env_type": "spaceinvaders_atari_gym",
                            "num_agents": 1
                        },
                        "reinforcement_learning_cfg": {
                            "algorithm_type": "dqn",
                            "dqn_cfg": {
                                "epsilon_start": 1.0,
                                "epsilon_end": 0.01,
                                "epsilon_decay": 0.995,
                                "lr": 0.0001,
                                "model_cfg": {
                                    "hidden_layers": [
                                        64,
                                        64
                                    ]
                                },
                            },
                            "ddpg_cfg": None
                        },
                        "replay_memory_cfg": {
                            "buffer_size": 100000,
                            "prioritized_replay": True,
                            "prioritized_replay_alpha": 0.6,
                            "prioritized_replay_beta0": 0.4,
                            "prioritized_replay_eps": 1e-06
                        },
                        "trainer_cfg": {
                            "batch_size": 64,
                            "eval_frequency": 16,
                            "eval_steps": 4,
                            "gamma": 0.99,
                            "human_flag": False,
                            "max_episode_steps": 2,
                            "max_steps": 128,
                            "tau": 0.001,
                            "update_every": 4,
                            "num_updates": 1
                        }
                    },
                    {
                        'id': 'banana',
                        'gym_id': 'env/unity/mac/banana.app',
                        "agent_cfg": {
                            "action_size": 4,
                            "discrete": True,
                            "num_frames": 1,
                            "state_rgb": False,
                            "state_size": 37
                        },
                        "environment_cfg": {
                            "env_type": "unity",
                            "num_agents": 1
                        },
                        "reinforcement_learning_cfg": {
                            "algorithm_type": "dqn_double",
                            "dqn_cfg": {
                                "epsilon_start": 1.0,
                                "epsilon_end": 0.01,
                                "epsilon_decay": 0.995,
                                "lr": 0.0001,
                                "model_cfg": {
                                    "hidden_layers": [
                                        64,
                                        64
                                    ]
                                },
                            },
                            "ddpg_cfg": None
                        },
                        "replay_memory_cfg": {
                            "buffer_size": 100000,
                            "prioritized_replay": True,
                            "prioritized_replay_alpha": 0.6,
                            "prioritized_replay_beta0": 0.4,
                            "prioritized_replay_eps": 1e-06
                        },
                        "trainer_cfg": {
                            "batch_size": 64,
                            "eval_frequency": 16,
                            "eval_steps": 4,
                            "gamma": 0.99,
                            "human_flag": False,
                            "max_episode_steps": 2,
                            "max_steps": 128,
                            "tau": 0.001,
                            "update_every": 4,
                            "num_updates": 1
                        }
                    },
                ]
            }

        return Configuration(test_flag=True, exp_cfg=cfg)
    def get():
        cfg = \
            {
                "experiment_cfgs": [
                    {
                        "id": "lunarlander-dqn",
                        "gym_id": "LunarLander-v2",
                        "agent_cfg": {
                            "action_size": 4,
                            "discrete": True,
                            "num_frames": 1,
                            "state_rgb": False,
                            "state_size": 8
                        },
                        "environment_cfg": {
                            "env_type": "gym",
                            "num_agents": 1
                        },
                        "reinforcement_learning_cfg": {
                            "algorithm_type": "dqn",
                            "dqn_cfg": {
                                "epsilon_start": 1.0,
                                "epsilon_end": 0.01,
                                "epsilon_decay": 0.995,
                                "lr": 0.0001,
                                "model_cfg": {
                                    "hidden_layers": [
                                        64,
                                        64
                                    ]
                                },
                            },
                            "ddpg_cfg": None
                        },
                        "replay_memory_cfg": {
                             "buffer_size": 100000,
                             "prioritized_replay": True,
                             "prioritized_replay_alpha": 0.6,
                             "prioritized_replay_beta0": 0.4,
                             "prioritized_replay_eps": 1e-06
                        },
                        "trainer_cfg": {
                            "batch_size": 64,
                            "eval_frequency": 16,
                            "eval_steps": 4,
                            "gamma": 0.99,
                            "human_flag": False,
                            "max_episode_steps": 2,
                            "max_steps": 128,
                            "tau": 0.001,
                            "update_every": 4,
                            "num_updates": 1
                        }
                    },
                    {
                        "id": "lunarlander-dqn-withframes",
                        "gym_id": "LunarLander-v2",
                        "agent_cfg": {
                            "action_size": 4,
                            "discrete": True,
                            "num_frames": 10,
                            "state_rgb": False,
                            "state_size": 8
                        },
                        "environment_cfg": {
                            "env_type": "gym",
                            "num_agents": 1
                        },
                        "reinforcement_learning_cfg": {
                            "algorithm_type": "dqn",
                            "dqn_cfg": {
                                "epsilon_start": 1.0,
                                "epsilon_end": 0.01,
                                "epsilon_decay": 0.995,
                                "lr": 0.0001,
                                "model_cfg": {
                                    "hidden_layers": [
                                        64,
                                        64
                                    ]
                                },
                            },
                            "ddpg_cfg": None
                        },
                        "replay_memory_cfg": {
                            "buffer_size": 100000,
                            "prioritized_replay": True,
                            "prioritized_replay_alpha": 0.6,
                            "prioritized_replay_beta0": 0.4,
                            "prioritized_replay_eps": 1e-06
                        },
                        "trainer_cfg": {
                            "batch_size": 64,
                            "eval_frequency": 16,
                            "eval_steps": 4,
                            "gamma": 0.99,
                            "human_flag": False,
                            "max_episode_steps": 2,
                            "max_steps": 128,
                            "tau": 0.001,
                            "update_every": 4,
                            "num_updates": 1
                        }
                    },
                    {
                        "id": "lunarlander-dqn-noprio",
                        "gym_id": "LunarLander-v2",
                        "agent_cfg": {
                            "action_size": 4,
                            "discrete": True,
                            "num_frames": 1,
                            "state_rgb": False,
                            "state_size": 8
                        },
                        "environment_cfg": {
                            "env_type": "gym",
                            "num_agents": 1
                        },
                       "reinforcement_learning_cfg": {
                            "algorithm_type": "dqn",
                            "dqn_cfg": {
                                "epsilon_start": 1.0,
                                "epsilon_end": 0.01,
                                "epsilon_decay": 0.995,
                                "lr": 0.0001,
                                "model_cfg": {
                                    "hidden_layers": [
                                        64,
                                        64
                                    ]
                                },
                            },
                            "ddpg_cfg": None
                        },
                        "replay_memory_cfg": {
                            "buffer_size": 100000,
                            "prioritized_replay": False,
                            "prioritized_replay_alpha": 0.6,
                            "prioritized_replay_beta0": 0.4,
                            "prioritized_replay_eps": 1e-06
                        },
                        "trainer_cfg": {
                            "batch_size": 64,
                            "eval_frequency": 16,
                            "eval_steps": 4,
                            "gamma": 0.99,
                            "human_flag": False,
                            "max_episode_steps": 2,
                            "max_steps": 128,
                            "tau": 0.001,
                            "update_every": 4,
                            "num_updates": 1
                        }
                    },
                    {
                        "id": "lunarlander-dqn-dueling",
                        "gym_id": "LunarLander-v2",
                        "agent_cfg": {
                            "action_size": 4,
                            "discrete": True,
                            "num_frames": 1,
                            "state_rgb": False,
                            "state_size": 8
                        },
                        "environment_cfg": {
                            "env_type": "gym",
                            "num_agents": 1
                        },
                        "reinforcement_learning_cfg": {
                            "algorithm_type": "dqn_dueling",
                            "dqn_cfg": {
                                "epsilon_start": 1.0,
                                "epsilon_end": 0.01,
                                "epsilon_decay": 0.995,
                                "lr": 0.0001,
                                "model_cfg": {
                                    "hidden_layers": [
                                        64,
                                        64
                                    ]
                                },
                            },
                            "ddpg_cfg": None
                        },
                        "replay_memory_cfg": {
                            "buffer_size": 100000,
                            "prioritized_replay": True,
                            "prioritized_replay_alpha": 0.6,
                            "prioritized_replay_beta0": 0.4,
                            "prioritized_replay_eps": 1e-06
                        },
                        "trainer_cfg": {
                            "batch_size": 64,
                            "eval_frequency": 16,
                            "eval_steps": 4,
                            "gamma": 0.99,
                            "human_flag": False,
                            "max_episode_steps": 2,
                            "max_steps": 128,
                            "tau": 0.001,
                            "update_every": 4,
                            "num_updates": 1
                        }
                    },
                    {
                        "id": "lunarlander-dqn-double",
                        "gym_id": "LunarLander-v2",
                        "agent_cfg": {
                            "action_size": 4,
                            "discrete": True,
                            "num_frames": 1,
                            "state_rgb": False,
                            "state_size": 8
                        },
                        "environment_cfg": {
                            "env_type": "gym",
                            "num_agents": 1
                        },
                        "reinforcement_learning_cfg": {
                            "algorithm_type": "dqn_double",
                            "dqn_cfg": {
                                "epsilon_start": 1.0,
                                "epsilon_end": 0.01,
                                "epsilon_decay": 0.995,
                                "lr": 0.0001,
                                "model_cfg": {
                                    "hidden_layers": [
                                        64,
                                        64
                                    ]
                                },
                            },
                            "ddpg_cfg": None
                        },
                        "replay_memory_cfg": {
                            "buffer_size": 100000,
                            "prioritized_replay": True,
                            "prioritized_replay_alpha": 0.6,
                            "prioritized_replay_beta0": 0.4,
                            "prioritized_replay_eps": 1e-06
                        },
                        "trainer_cfg": {
                            "batch_size": 64,
                            "eval_frequency": 16,
                            "eval_steps": 4,
                            "gamma": 0.99,
                            "human_flag": False,
                            "max_episode_steps": 2,
                            "max_steps": 128,
                            "tau": 0.001,
                            "update_every": 4,
                            "num_updates": 1
                        }

                    },
                    {
                        "id": "walker-ddpg",
                        "gym_id": "BipedalWalker-v3",
                        "agent_cfg": {
                            "action_size": 4,
                            "discrete": True,
                            "num_frames": 1,
                            "state_rgb": False,
                            "state_size": 24
                        },
                        "environment_cfg": {
                            "env_type": "gym",
                            "num_agents": 1
                        },
                        "reinforcement_learning_cfg": {
                            "algorithm_type": "ddpg",
                            "dqn_cfg": None,
                            "ddpg_cfg": {
                                "epsilon_start": 1.0,
                                "epsilon_end": 0.01,
                                "epsilon_decay": 0.995,
                                "lr_actor": 0.0001,
                                "lr_critic": 0.0003,
                                "weight_decay": 0.0001,
                                "actor_model_cfg": {
                                    "hidden_layers": [
                                        64,
                                        64
                                    ]
                                },
                                "critic_model_cfg": {
                                    "hidden_layers": [
                                        128,
                                        128
                                    ]
                                },
                            }
                        },
                        "replay_memory_cfg": {
                            "buffer_size": 100000,
                            "prioritized_replay": False,
                            "prioritized_replay_alpha": 0.6,
                            "prioritized_replay_beta0": 0.4,
                            "prioritized_replay_eps": 1e-06
                        },
                        "trainer_cfg": {
                            "batch_size": 64,
                            "eval_frequency": 16,
                            "eval_steps": 4,
                            "gamma": 0.99,
                            "human_flag": False,
                            "max_episode_steps": 2,
                            "max_steps": 128,
                            "tau": 0.001,
                            "update_every": 4,
                            "num_updates": 1
                        }

                    },
                ]
            }

        return Configuration(test_flag=True, exp_cfg=cfg)