def test_minibatches_per_step(self):
        _epochs = self.epochs
        self.epochs = 2
        rl_parameters = RLParameters(gamma=0.95,
                                     target_update_rate=0.9,
                                     maxq_learning=True)
        rainbow_parameters = RainbowDQNParameters(double_q_learning=True,
                                                  dueling_architecture=False)
        training_parameters1 = TrainingParameters(
            layers=self.layers,
            activations=self.activations,
            minibatch_size=1024,
            minibatches_per_step=1,
            learning_rate=0.25,
            optimizer="ADAM",
        )
        training_parameters2 = TrainingParameters(
            layers=self.layers,
            activations=self.activations,
            minibatch_size=128,
            minibatches_per_step=8,
            learning_rate=0.25,
            optimizer="ADAM",
        )
        env1 = Env(self.state_dims, self.action_dims)
        env2 = Env(self.state_dims, self.action_dims)
        model_parameters1 = DiscreteActionModelParameters(
            actions=env1.actions,
            rl=rl_parameters,
            rainbow=rainbow_parameters,
            training=training_parameters1,
        )
        model_parameters2 = DiscreteActionModelParameters(
            actions=env2.actions,
            rl=rl_parameters,
            rainbow=rainbow_parameters,
            training=training_parameters2,
        )
        # minibatch_size / 8, minibatches_per_step * 8 should give the same result
        logger.info("Training model 1")
        trainer1 = self._train(model_parameters1, env1)
        SummaryWriterContext._reset_globals()
        logger.info("Training model 2")
        trainer2 = self._train(model_parameters2, env2)

        weight1 = trainer1.q_network.fc.layers[-1].weight.detach().numpy()
        weight2 = trainer2.q_network.fc.layers[-1].weight.detach().numpy()

        # Due to numerical stability this tolerance has to be fairly high
        self.assertTrue(np.allclose(weight1, weight2, rtol=0.0, atol=1e-3))
        self.epochs = _epochs
 def get_sarsa_parameters(self, environment, reward_shape, dueling,
                          categorical, quantile, clip_grad_norm):
     rl_parameters = RLParameters(
         gamma=DISCOUNT,
         target_update_rate=1.0,
         maxq_learning=False,
         reward_boost=reward_shape,
     )
     training_parameters = TrainingParameters(
         layers=[-1, 128, -1] if dueling else [-1, -1],
         activations=["relu", "relu"] if dueling else ["linear"],
         minibatch_size=self.minibatch_size,
         learning_rate=0.05,
         optimizer="ADAM",
         clip_grad_norm=clip_grad_norm,
     )
     return DiscreteActionModelParameters(
         actions=environment.ACTIONS,
         rl=rl_parameters,
         training=training_parameters,
         rainbow=RainbowDQNParameters(
             double_q_learning=True,
             dueling_architecture=dueling,
             categorical=categorical,
             quantile=quantile,
             num_atoms=5,
         ),
     )
    def test_no_soft_update(self):
        model = Model()
        target_model = copy.deepcopy(model)

        for target_param, param in zip(model.parameters(),
                                       target_model.parameters()):
            self.assertIs(target_param, param)

        optimizer = torch.optim.Adam(model.parameters())

        x = torch.tensor([1, 2], dtype=torch.int64)
        emb = model(x)

        loss = emb.sum()

        loss.backward()
        optimizer.step()

        params = list(model.parameters())
        self.assertEqual(1, len(params))
        param = params[0].detach().numpy()

        trainer = RLTrainer(DiscreteActionModelParameters(rl=RLParameters()),
                            use_gpu=False)
        trainer._soft_update(model, target_model, 0.1)

        target_params = list(target_model.parameters())
        self.assertEqual(1, len(target_params))
        target_param = target_params[0].detach().numpy()

        npt.assert_array_equal(target_param, param)
    def test_trainer_maxq(self):
        env = Env(self.state_dims, self.action_dims)
        maxq_parameters = DiscreteActionModelParameters(
            actions=env.actions,
            rl=RLParameters(gamma=0.95,
                            target_update_rate=0.9,
                            maxq_learning=True),
            rainbow=RainbowDQNParameters(double_q_learning=True,
                                         dueling_architecture=False),
            training=TrainingParameters(
                layers=self.layers,
                activations=self.activations,
                minibatch_size=1024,
                learning_rate=0.25,
                optimizer="ADAM",
            ),
        )

        # Q value should converge to very close to 20
        trainer = self._train(maxq_parameters, env)
        avg_q_value_after_training = torch.mean(trainer.all_action_scores)
        self.assertLess(avg_q_value_after_training, 22)
        self.assertGreater(avg_q_value_after_training, 18)
示例#5
0
def create_trainer(params: OpenAiGymParameters, env: OpenAIGymEnvironment):
    use_gpu = params.use_gpu
    model_type = params.model_type
    assert params.rl is not None
    rl_parameters = params.rl

    if model_type == ModelType.PYTORCH_DISCRETE_DQN.value:
        assert params.training is not None
        training_parameters = params.training
        assert params.rainbow is not None
        if env.img:
            assert (
                training_parameters.cnn_parameters is not None
            ), "Missing CNN parameters for image input"
            training_parameters.cnn_parameters.conv_dims[0] = env.num_input_channels
            training_parameters._replace(
                cnn_parameters=training_parameters.cnn_parameters._replace(
                    input_height=env.height,
                    input_width=env.width,
                    num_input_channels=env.num_input_channels,
                )
            )
        else:
            assert (
                training_parameters.cnn_parameters is None
            ), "Extra CNN parameters for non-image input"
        discrete_trainer_params = DiscreteActionModelParameters(
            actions=env.actions,
            rl=rl_parameters,
            training=training_parameters,
            rainbow=params.rainbow,
            evaluation=params.evaluation,
        )
        trainer = create_dqn_trainer_from_params(
            discrete_trainer_params, env.normalization, use_gpu
        )

    elif model_type == ModelType.PYTORCH_PARAMETRIC_DQN.value:
        assert params.training is not None
        training_parameters = params.training
        assert params.rainbow is not None
        if env.img:
            assert (
                training_parameters.cnn_parameters is not None
            ), "Missing CNN parameters for image input"
            training_parameters.cnn_parameters.conv_dims[0] = env.num_input_channels
        else:
            assert (
                training_parameters.cnn_parameters is None
            ), "Extra CNN parameters for non-image input"
        continuous_trainer_params = ContinuousActionModelParameters(
            rl=rl_parameters, training=training_parameters, rainbow=params.rainbow
        )
        trainer = create_parametric_dqn_trainer_from_params(
            continuous_trainer_params,
            env.normalization,
            env.normalization_action,
            use_gpu,
        )

    elif model_type == ModelType.TD3.value:
        assert params.td3_training is not None
        assert params.critic_training is not None
        assert params.actor_training is not None
        td3_trainer_params = TD3ModelParameters(
            rl=rl_parameters,
            training=params.td3_training,
            q_network=params.critic_training,
            actor_network=params.actor_training,
        )
        trainer = get_td3_trainer(env, td3_trainer_params, use_gpu)

    elif model_type == ModelType.SOFT_ACTOR_CRITIC.value:
        assert params.sac_training is not None
        assert params.critic_training is not None
        assert params.actor_training is not None
        trainer = get_sac_trainer(
            env,
            rl_parameters,
            params.sac_training,
            params.critic_training,
            params.actor_training,
            params.sac_value_training,
            use_gpu,
        )
    elif model_type == ModelType.CEM.value:
        assert params.cem is not None
        cem_trainer_params = params.cem._replace(rl=params.rl)
        trainer = get_cem_trainer(env, cem_trainer_params, use_gpu)
    else:
        raise NotImplementedError("Model of type {} not supported".format(model_type))

    return trainer
示例#6
0
def single_process_main(gpu_index, *args):
    params = args[0]
    # Set minibatch size based on # of devices being used to train
    params["training"]["minibatch_size"] *= minibatch_size_multiplier(
        params["use_gpu"], params["use_all_avail_gpus"])

    action_names = params["actions"]

    rl_parameters = from_json(params["rl"], RLParameters)
    training_parameters = from_json(params["training"], TrainingParameters)
    rainbow_parameters = from_json(params["rainbow"], RainbowDQNParameters)
    if "evaluation" in params:
        evaluation_parameters = from_json(params["evaluation"],
                                          EvaluationParameters)
    else:
        evaluation_parameters = EvaluationParameters()

    model_params = DiscreteActionModelParameters(
        actions=action_names,
        rl=rl_parameters,
        training=training_parameters,
        rainbow=rainbow_parameters,
        evaluation=evaluation_parameters,
    )
    state_normalization = BaseWorkflow.read_norm_file(
        params["state_norm_data_path"])

    writer = SummaryWriter(log_dir=params["model_output_path"])
    logger.info("TensorBoard logging location is: {}".format(writer.log_dir))

    if params["use_all_avail_gpus"]:
        BaseWorkflow.init_multiprocessing(
            int(params["num_processes_per_node"]),
            int(params["num_nodes"]),
            int(params["node_index"]),
            gpu_index,
            params["init_method"],
        )

    workflow = DqnWorkflow(
        model_params,
        state_normalization,
        params["use_gpu"],
        params["use_all_avail_gpus"],
    )

    sorted_features, _ = sort_features_by_normalization(state_normalization)
    preprocess_handler = DiscreteDqnPreprocessHandler(
        len(action_names), PandasSparseToDenseProcessor(sorted_features))

    train_dataset = JSONDatasetReader(
        params["training_data_path"],
        batch_size=training_parameters.minibatch_size,
        preprocess_handler=preprocess_handler,
    )
    eval_dataset = JSONDatasetReader(
        params["eval_data_path"],
        batch_size=training_parameters.minibatch_size,
        preprocess_handler=preprocess_handler,
    )

    with summary_writer_context(writer):
        workflow.train_network(train_dataset, eval_dataset,
                               int(params["epochs"]))

    if int(params["node_index"]) == 0 and gpu_index == 0:
        workflow.save_models(params["model_output_path"])