Exemplo n.º 1
0
    def test_minibatches_per_step(self):
        _epochs = self.epochs
        self.epochs = 2
        rl_parameters = RLParameters(gamma=0.95,
                                     target_update_rate=0.9,
                                     maxq_learning=True)
        rainbow_parameters = RainbowDQNParameters(double_q_learning=True,
                                                  dueling_architecture=False)
        training_parameters1 = TrainingParameters(
            layers=self.layers,
            activations=self.activations,
            minibatch_size=1024,
            minibatches_per_step=1,
            learning_rate=0.25,
            optimizer="ADAM",
        )
        training_parameters2 = TrainingParameters(
            layers=self.layers,
            activations=self.activations,
            minibatch_size=128,
            minibatches_per_step=8,
            learning_rate=0.25,
            optimizer="ADAM",
        )
        env1 = Env(self.state_dims, self.action_dims)
        env2 = Env(self.state_dims, self.action_dims)
        model_parameters1 = DiscreteActionModelParameters(
            actions=env1.actions,
            rl=rl_parameters,
            rainbow=rainbow_parameters,
            training=training_parameters1,
        )
        model_parameters2 = DiscreteActionModelParameters(
            actions=env2.actions,
            rl=rl_parameters,
            rainbow=rainbow_parameters,
            training=training_parameters2,
        )
        # minibatch_size / 8, minibatches_per_step * 8 should give the same result
        logger.info("Training model 1")
        trainer1 = self._train(model_parameters1, env1)
        SummaryWriterContext._reset_globals()
        logger.info("Training model 2")
        trainer2 = self._train(model_parameters2, env2)

        weight1 = trainer1.q_network.fc.dnn[-2].weight.detach().numpy()
        weight2 = trainer2.q_network.fc.dnn[-2].weight.detach().numpy()

        # Due to numerical stability this tolerance has to be fairly high
        self.assertTrue(np.allclose(weight1, weight2, rtol=0.0, atol=1e-3))
        self.epochs = _epochs
Exemplo n.º 2
0
 def get_sarsa_parameters(
     self, environment, reward_shape, dueling, categorical, quantile, clip_grad_norm
 ):
     rl_parameters = RLParameters(
         gamma=DISCOUNT,
         target_update_rate=1.0,
         maxq_learning=False,
         reward_boost=reward_shape,
     )
     training_parameters = TrainingParameters(
         layers=[-1, 128, -1] if dueling else [-1, -1],
         activations=["relu", "relu"] if dueling else ["linear"],
         minibatch_size=self.minibatch_size,
         learning_rate=0.05,
         optimizer="ADAM",
         clip_grad_norm=clip_grad_norm,
     )
     return DiscreteActionModelParameters(
         actions=environment.ACTIONS,
         rl=rl_parameters,
         training=training_parameters,
         rainbow=RainbowDQNParameters(
             double_q_learning=True,
             dueling_architecture=dueling,
             categorical=categorical,
             quantile=quantile,
             num_atoms=5,
         ),
     )
Exemplo n.º 3
0
    def test_trainer_maxq(self):
        env = Env(self.state_dims, self.action_dims)
        maxq_parameters = DiscreteActionModelParameters(
            actions=env.actions,
            rl=RLParameters(gamma=0.95,
                            target_update_rate=0.9,
                            maxq_learning=True),
            rainbow=RainbowDQNParameters(double_q_learning=True,
                                         dueling_architecture=False),
            training=TrainingParameters(
                layers=self.layers,
                activations=self.activations,
                minibatch_size=1024,
                learning_rate=0.25,
                optimizer="ADAM",
            ),
        )

        # Q value should converge to very close to 20
        trainer = self._train(maxq_parameters, env)
        avg_q_value_after_training = torch.mean(trainer.all_action_scores)
        self.assertLess(avg_q_value_after_training, 22)
        self.assertGreater(avg_q_value_after_training, 18)
Exemplo n.º 4
0
def create_trainer(params: OpenAiGymParameters, env: OpenAIGymEnvironment):
    use_gpu = params.use_gpu
    model_type = params.model_type
    assert params.rl is not None
    rl_parameters = params.rl

    if model_type == ModelType.PYTORCH_DISCRETE_DQN.value:
        assert params.training is not None
        training_parameters = params.training
        assert params.rainbow is not None
        if env.img:
            assert (training_parameters.cnn_parameters
                    is not None), "Missing CNN parameters for image input"
            training_parameters.cnn_parameters.conv_dims[
                0] = env.num_input_channels
            training_parameters._replace(
                cnn_parameters=training_parameters.cnn_parameters._replace(
                    input_height=env.height,
                    input_width=env.width,
                    num_input_channels=env.num_input_channels,
                ))
        else:
            assert (training_parameters.cnn_parameters is
                    None), "Extra CNN parameters for non-image input"
        discrete_trainer_params = DiscreteActionModelParameters(
            actions=env.actions,
            rl=rl_parameters,
            training=training_parameters,
            rainbow=params.rainbow,
            evaluation=params.evaluation,
        )
        trainer = create_dqn_trainer_from_params(discrete_trainer_params,
                                                 env.normalization, use_gpu)

    elif model_type == ModelType.PYTORCH_PARAMETRIC_DQN.value:
        assert params.training is not None
        training_parameters = params.training
        assert params.rainbow is not None
        if env.img:
            assert (training_parameters.cnn_parameters
                    is not None), "Missing CNN parameters for image input"
            training_parameters.cnn_parameters.conv_dims[
                0] = env.num_input_channels
        else:
            assert (training_parameters.cnn_parameters is
                    None), "Extra CNN parameters for non-image input"
        continuous_trainer_params = ContinuousActionModelParameters(
            rl=rl_parameters,
            training=training_parameters,
            rainbow=params.rainbow)
        trainer = create_parametric_dqn_trainer_from_params(
            continuous_trainer_params,
            env.normalization,
            env.normalization_action,
            use_gpu,
        )

    elif model_type == ModelType.TD3.value:
        assert params.td3_training is not None
        assert params.critic_training is not None
        assert params.actor_training is not None
        td3_trainer_params = TD3ModelParameters(
            rl=rl_parameters,
            training=params.td3_training,
            q_network=params.critic_training,
            actor_network=params.actor_training,
        )
        trainer = get_td3_trainer(env, td3_trainer_params, use_gpu)

    elif model_type == ModelType.SOFT_ACTOR_CRITIC.value:
        assert params.sac_training is not None
        assert params.critic_training is not None
        assert params.actor_training is not None
        trainer = get_sac_trainer(
            env,
            rl_parameters,
            params.sac_training,
            params.critic_training,
            params.actor_training,
            params.sac_value_training,
            use_gpu,
        )
    elif model_type == ModelType.CEM.value:
        assert params.cem is not None
        cem_trainer_params = params.cem._replace(rl=params.rl)
        trainer = get_cem_trainer(env, cem_trainer_params, use_gpu)
    else:
        raise NotImplementedError(
            "Model of type {} not supported".format(model_type))

    return trainer
Exemplo n.º 5
0
def single_process_main(gpu_index, *args):
    params = args[0]
    # Set minibatch size based on # of devices being used to train
    params["training"]["minibatch_size"] *= minibatch_size_multiplier(
        params["use_gpu"], params["use_all_avail_gpus"]
    )

    action_names = params["actions"]

    rl_parameters = from_json(params["rl"], RLParameters)
    training_parameters = from_json(params["training"], TrainingParameters)
    rainbow_parameters = from_json(params["rainbow"], RainbowDQNParameters)
    if "evaluation" in params:
        evaluation_parameters = from_json(params["evaluation"], EvaluationParameters)
    else:
        evaluation_parameters = EvaluationParameters()

    model_params = DiscreteActionModelParameters(
        actions=action_names,
        rl=rl_parameters,
        training=training_parameters,
        rainbow=rainbow_parameters,
        evaluation=evaluation_parameters,
    )
    state_normalization = BaseWorkflow.read_norm_file(params["state_norm_data_path"])
    logger.info(f"Got state normalization: {state_normalization}")

    writer = SummaryWriter(log_dir=params["model_output_path"])
    logger.info("TensorBoard logging location is: {}".format(writer.log_dir))

    if params["use_all_avail_gpus"]:
        BaseWorkflow.init_multiprocessing(
            int(params["num_processes_per_node"]),
            int(params["num_nodes"]),
            int(params["node_index"]),
            gpu_index,
            params["init_method"],
        )

    workflow = DqnWorkflow(
        model_params,
        state_normalization,
        params["use_gpu"],
        params["use_all_avail_gpus"],
    )

    sorted_features, _ = sort_features_by_normalization(state_normalization)
    preprocess_handler = DiscreteDqnPreprocessHandler(
        len(action_names), StringKeySparseToDenseProcessor(sorted_features)
    )

    train_dataset = JSONDatasetReader(
        params["training_data_path"],
        batch_size=training_parameters.minibatch_size,
        preprocess_handler=preprocess_handler,
    )
    eval_dataset = JSONDatasetReader(
        params["eval_data_path"],
        batch_size=training_parameters.minibatch_size,
        preprocess_handler=preprocess_handler,
    )

    with summary_writer_context(writer):
        workflow.train_network(train_dataset, eval_dataset, int(params["epochs"]))

    if int(params["node_index"]) == 0 and gpu_index == 0:
        workflow.save_models(params["model_output_path"])