def create_dqn_agents(agent_id_list, config): num_actions = config.algorithm.num_actions set_seeds(config.seed) agent_dict = {} for agent_id in agent_id_list: q_net = NNStack( "q_value", FullyConnectedBlock( input_dim=config.algorithm.input_dim, output_dim=num_actions, activation=nn.LeakyReLU, is_head=True, **config.algorithm.model ) ) learning_model = LearningModel( q_net, optimizer_options=OptimizerOptions(cls=RMSprop, params=config.algorithm.optimizer) ) algorithm = DQN( learning_model, DQNConfig(**config.algorithm.hyper_params, loss_cls=nn.SmoothL1Loss) ) agent_dict[agent_id] = DQNAgent( agent_id, algorithm, ColumnBasedStore(**config.experience_pool), **config.training_loop_parameters ) return agent_dict
def _assemble_agents(self): agent_params = AgentParameters(**training_config) for agent_id in self._agent_id_list: eval_model = LearningModel(decision_layers=MLPDecisionLayers( name=f'{agent_id}.policy', input_dim=self._state_shaper.dim, **model_config)) algorithm = DQN(model_dict={"eval": eval_model}, optimizer_opt=(RMSprop, optimizer_config), loss_func_dict={"eval": smooth_l1_loss}, hyper_params=DQNHyperParams(**dqn_config)) self._agent_dict[agent_id] = Agent(name=agent_id, algorithm=algorithm, params=agent_params)
def _assemble(self, agent_dict): set_seeds(config.agents.seed) num_actions = config.agents.algorithm.num_actions for agent_id in self._agent_id_list: eval_model = LearningModel(decision_layers=MLPDecisionLayers( name=f'{agent_id}.policy', input_dim=self._state_shaper.dim, output_dim=num_actions, **config.agents.algorithm.model)) algorithm = DQN(model_dict={"eval": eval_model}, optimizer_opt=(RMSprop, config.agents.algorithm.optimizer), loss_func_dict={"eval": smooth_l1_loss}, hyper_params=DQNHyperParams( **config.agents.algorithm.hyper_parameters, num_actions=num_actions)) experience_pool = ColumnBasedStore(**config.agents.experience_pool) agent_dict[agent_id] = CIMAgent( name=agent_id, algorithm=algorithm, experience_pool=experience_pool, **config.agents.training_loop_parameters)
def get_dqn_agent(): q_model = SimpleMultiHeadModel( FullyConnectedBlock(**agent_config["model"]), optim_option=agent_config["optimization"]) return DQN(q_model, DQNConfig(**agent_config["hyper_params"]))