Exemplo n.º 1
0
    def get_critic_exporter(self, trainer, environment):
        feature_extractor = PredictorFeatureExtractor(
            state_normalization_parameters=environment.normalization,
            action_normalization_parameters=environment.normalization_action,
        )
        output_transformer = ParametricActionOutputTransformer()

        return ParametricDQNExporter(trainer.q1_network, feature_extractor,
                                     output_transformer)
Exemplo n.º 2
0
def main(params):
    # Set minibatch size based on # of devices being used to train
    params["training"]["minibatch_size"] *= minibatch_size_multiplier(
        params["use_gpu"], params["use_all_avail_gpus"])

    rl_parameters = RLParameters(**params["rl"])
    training_parameters = TrainingParameters(**params["training"])
    rainbow_parameters = RainbowDQNParameters(**params["rainbow"])

    model_params = ContinuousActionModelParameters(
        rl=rl_parameters,
        training=training_parameters,
        rainbow=rainbow_parameters)
    state_normalization = BaseWorkflow.read_norm_file(
        params["state_norm_data_path"])
    action_normalization = BaseWorkflow.read_norm_file(
        params["action_norm_data_path"])

    writer = SummaryWriter(log_dir=params["model_output_path"])
    logger.info("TensorBoard logging location is: {}".format(writer.log_dir))

    preprocess_handler = ParametricDqnPreprocessHandler(
        Preprocessor(state_normalization, False),
        Preprocessor(action_normalization, False),
        PandasSparseToDenseProcessor(),
    )

    workflow = ParametricDqnWorkflow(
        model_params,
        preprocess_handler,
        state_normalization,
        action_normalization,
        params["use_gpu"],
        params["use_all_avail_gpus"],
    )

    train_dataset = JSONDatasetReader(
        params["training_data_path"],
        batch_size=training_parameters.minibatch_size)
    eval_dataset = JSONDatasetReader(params["eval_data_path"], batch_size=16)

    with summary_writer_context(writer):
        workflow.train_network(train_dataset, eval_dataset,
                               int(params["epochs"]))

    exporter = ParametricDQNExporter(
        workflow.trainer.q_network,
        PredictorFeatureExtractor(
            state_normalization_parameters=state_normalization,
            action_normalization_parameters=action_normalization,
        ),
        ParametricActionOutputTransformer(),
    )
    return export_trainer_and_predictor(workflow.trainer,
                                        params["model_output_path"],
                                        exporter=exporter)  # noqa
Exemplo n.º 3
0
 def critic_predictor(
     self, feature_extractor=None, output_trasnformer=None, net_container=None
 ) -> _ParametricDQNPredictor:
     # TODO: We should combine the two Q functions
     q_network = self.q1_network.cpu_model()
     if net_container is not None:
         q_network = net_container(q_network)
     predictor = ParametricDQNExporter(
         q_network, feature_extractor, output_trasnformer
     ).export()
     self.q1_network.train()
     return predictor
Exemplo n.º 4
0
    def get_modular_sarsa_trainer_exporter(self,
                                           environment,
                                           parameters=None,
                                           use_gpu=False,
                                           use_all_avail_gpus=False):
        parameters = parameters or self.get_sarsa_parameters()
        q_network = FullyConnectedParametricDQN(
            state_dim=get_num_output_features(environment.normalization),
            action_dim=get_num_output_features(
                environment.normalization_action),
            sizes=parameters.training.layers[1:-1],
            activations=parameters.training.activations[:-1],
        )
        reward_network = FullyConnectedParametricDQN(
            state_dim=get_num_output_features(environment.normalization),
            action_dim=get_num_output_features(
                environment.normalization_action),
            sizes=parameters.training.layers[1:-1],
            activations=parameters.training.activations[:-1],
        )
        if use_gpu:
            q_network = q_network.cuda()
            reward_network = reward_network.cuda()
            if use_all_avail_gpus:
                q_network = q_network.get_data_parallel_model()
                reward_network = reward_network.get_data_parallel_model()

        q_network_target = q_network.get_target_network()
        trainer = _ParametricDQNTrainer(q_network, q_network_target,
                                        reward_network, parameters)
        state_preprocessor = Preprocessor(environment.normalization, False,
                                          True)
        action_preprocessor = Preprocessor(environment.normalization_action,
                                           False, True)
        feature_extractor = PredictorFeatureExtractor(
            state_normalization_parameters=environment.normalization,
            action_normalization_parameters=environment.normalization_action,
        )
        output_transformer = ParametricActionOutputTransformer()
        exporter = ParametricDQNExporter(
            q_network,
            feature_extractor,
            output_transformer,
            state_preprocessor,
            action_preprocessor,
        )
        return (trainer, exporter)
Exemplo n.º 5
0
    def _test_ddpg_trainer(self, use_gpu=False, use_all_avail_gpus=False):
        # FIXME:the test not really working
        self.run_pre_training_eval = False
        self.check_tolerance = False
        environment = GridworldContinuous()

        parameters = self.get_ddpg_parameters()

        state_dim = get_num_output_features(environment.normalization)
        action_dim = get_num_output_features(environment.normalization_action)

        # Build Actor Network
        actor_network = ActorNetModel(
            layers=[state_dim] + parameters.actor_training.layers[1:-1] +
            [action_dim],
            activations=parameters.actor_training.activations,
            fl_init=parameters.shared_training.final_layer_init,
            state_dim=state_dim,
            action_dim=action_dim,
            use_gpu=use_gpu,
            use_all_avail_gpus=use_all_avail_gpus,
        )

        # Build Critic Network
        critic_network = CriticNetModel(
            # Ensure dims match input state and scalar output
            layers=[state_dim] + parameters.critic_training.layers[1:-1] + [1],
            activations=parameters.critic_training.activations,
            fl_init=parameters.shared_training.final_layer_init,
            state_dim=state_dim,
            action_dim=action_dim,
            use_gpu=use_gpu,
            use_all_avail_gpus=use_all_avail_gpus,
        )

        trainer = DDPGTrainer(
            actor_network,
            critic_network,
            parameters,
            environment.normalization,
            environment.normalization_action,
            environment.min_action_range,
            environment.max_action_range,
            use_gpu=use_gpu,
            use_all_avail_gpus=use_all_avail_gpus,
        )

        exporter = ParametricDQNExporter.from_state_action_normalization(
            trainer.critic,
            state_normalization=environment.normalization,
            action_normalization=environment.normalization_action,
        )

        evaluator = GridworldDDPGEvaluator(environment, DISCOUNT)
        self.evaluate_gridworld(environment, evaluator, trainer, exporter,
                                use_gpu)

        # Make sure actor predictor works
        actor = ActorExporter.from_state_action_normalization(
            trainer.actor,
            state_normalization=environment.normalization,
            action_normalization=environment.normalization_action,
        ).export()

        # Make sure all actions are optimal
        error = evaluator.evaluate_actor(actor, thres=0.2)
        print("gridworld optimal action match MAE: {0:.3f}".format(error))
def single_process_main(gpu_index, *args):
    params = args[0]
    # Set minibatch size based on # of devices being used to train
    params["training"]["minibatch_size"] *= minibatch_size_multiplier(
        params["use_gpu"], params["use_all_avail_gpus"])

    rl_parameters = RLParameters(**params["rl"])
    training_parameters = TrainingParameters(**params["training"])
    rainbow_parameters = RainbowDQNParameters(**params["rainbow"])

    model_params = ContinuousActionModelParameters(
        rl=rl_parameters,
        training=training_parameters,
        rainbow=rainbow_parameters)
    state_normalization = BaseWorkflow.read_norm_file(
        params["state_norm_data_path"])
    action_normalization = BaseWorkflow.read_norm_file(
        params["action_norm_data_path"])

    writer = SummaryWriter(log_dir=params["model_output_path"])
    logger.info("TensorBoard logging location is: {}".format(writer.log_dir))

    if params["use_all_avail_gpus"]:
        BaseWorkflow.init_multiprocessing(
            int(params["num_processes_per_node"]),
            int(params["num_nodes"]),
            int(params["node_index"]),
            gpu_index,
            params["init_method"],
        )

    workflow = ParametricDqnWorkflow(
        model_params,
        state_normalization,
        action_normalization,
        params["use_gpu"],
        params["use_all_avail_gpus"],
    )

    state_sorted_features, _ = sort_features_by_normalization(
        state_normalization)
    action_sorted_features, _ = sort_features_by_normalization(
        action_normalization)
    preprocess_handler = ParametricDqnPreprocessHandler(
        PandasSparseToDenseProcessor(state_sorted_features),
        PandasSparseToDenseProcessor(action_sorted_features),
    )

    train_dataset = JSONDatasetReader(
        params["training_data_path"],
        batch_size=training_parameters.minibatch_size,
        preprocess_handler=preprocess_handler,
    )
    eval_dataset = JSONDatasetReader(params["eval_data_path"],
                                     batch_size=16,
                                     preprocess_handler=preprocess_handler)

    with summary_writer_context(writer):
        workflow.train_network(train_dataset, eval_dataset,
                               int(params["epochs"]))

    exporter = ParametricDQNExporter(
        workflow.trainer.q_network,
        PredictorFeatureExtractor(
            state_normalization_parameters=state_normalization,
            action_normalization_parameters=action_normalization,
        ),
        ParametricActionOutputTransformer(),
    )

    if int(params["node_index"]) == 0 and gpu_index == 0:
        export_trainer_and_predictor(workflow.trainer,
                                     params["model_output_path"],
                                     exporter=exporter)  # noqa