Exemplo n.º 1
0
def create_dqn_agents(agent_id_list, config):
    num_actions = config.algorithm.num_actions
    set_seeds(config.seed)
    agent_dict = {}
    for agent_id in agent_id_list:
        q_net = NNStack(
            "q_value",
            FullyConnectedBlock(
                input_dim=config.algorithm.input_dim,
                output_dim=num_actions,
                activation=nn.LeakyReLU,
                is_head=True,
                **config.algorithm.model
            )
        )
        learning_model = LearningModel(
            q_net, 
            optimizer_options=OptimizerOptions(cls=RMSprop, params=config.algorithm.optimizer)
        )
        algorithm = DQN(
            learning_model,
            DQNConfig(**config.algorithm.hyper_params, loss_cls=nn.SmoothL1Loss)
        )
        agent_dict[agent_id] = DQNAgent(
            agent_id, algorithm, ColumnBasedStore(**config.experience_pool),
            **config.training_loop_parameters
        )

    return agent_dict
Exemplo n.º 2
0
    def _assemble_agents(self):
        agent_params = AgentParameters(**training_config)
        for agent_id in self._agent_id_list:
            eval_model = LearningModel(decision_layers=MLPDecisionLayers(
                name=f'{agent_id}.policy',
                input_dim=self._state_shaper.dim,
                **model_config))

            algorithm = DQN(model_dict={"eval": eval_model},
                            optimizer_opt=(RMSprop, optimizer_config),
                            loss_func_dict={"eval": smooth_l1_loss},
                            hyper_params=DQNHyperParams(**dqn_config))

            self._agent_dict[agent_id] = Agent(name=agent_id,
                                               algorithm=algorithm,
                                               params=agent_params)
Exemplo n.º 3
0
    def _assemble(self, agent_dict):
        set_seeds(config.agents.seed)
        num_actions = config.agents.algorithm.num_actions
        for agent_id in self._agent_id_list:
            eval_model = LearningModel(decision_layers=MLPDecisionLayers(
                name=f'{agent_id}.policy',
                input_dim=self._state_shaper.dim,
                output_dim=num_actions,
                **config.agents.algorithm.model))

            algorithm = DQN(model_dict={"eval": eval_model},
                            optimizer_opt=(RMSprop,
                                           config.agents.algorithm.optimizer),
                            loss_func_dict={"eval": smooth_l1_loss},
                            hyper_params=DQNHyperParams(
                                **config.agents.algorithm.hyper_parameters,
                                num_actions=num_actions))

            experience_pool = ColumnBasedStore(**config.agents.experience_pool)
            agent_dict[agent_id] = CIMAgent(
                name=agent_id,
                algorithm=algorithm,
                experience_pool=experience_pool,
                **config.agents.training_loop_parameters)
Exemplo n.º 4
0
def get_dqn_agent():
    q_model = SimpleMultiHeadModel(
        FullyConnectedBlock(**agent_config["model"]),
        optim_option=agent_config["optimization"])
    return DQN(q_model, DQNConfig(**agent_config["hyper_params"]))