Пример #1
0
class Seq2RewardModel(WorldModelBase):
    __hash__ = param_hash
    net_builder: ValueNetBuilder__Union = field(
        # pyre-fixme[28]: Unexpected keyword argument `Seq2RewardNetBuilder`.
        # pyre-fixme[28]: Unexpected keyword argument `Seq2RewardNetBuilder`.
        default_factory=lambda: ValueNetBuilder__Union(Seq2RewardNetBuilder=
                                                       Seq2RewardNetBuilder()))

    compress_net_builder: ValueNetBuilder__Union = field(
        # pyre-fixme[28]: Unexpected keyword argument `FullyConnected`.
        # pyre-fixme[28]: Unexpected keyword argument `FullyConnected`.
        default_factory=lambda: ValueNetBuilder__Union(FullyConnected=
                                                       FullyConnected()))

    trainer_param: Seq2RewardTrainerParameters = field(
        default_factory=Seq2RewardTrainerParameters)

    preprocessing_options: Optional[PreprocessingOptions] = None

    def build_trainer(
        self,
        normalization_data_map: Dict[str, NormalizationData],
        use_gpu: bool,
        reward_options: Optional[RewardOptions] = None,
    ) -> Seq2RewardTrainer:
        seq2reward_network = self.net_builder.value.build_value_network(
            normalization_data_map[NormalizationKey.STATE])
        trainer = Seq2RewardTrainer(seq2reward_network=seq2reward_network,
                                    params=self.trainer_param)
        return trainer

    def get_reporter(self) -> Seq2RewardReporter:
        return Seq2RewardReporter(self.trainer_param.action_names)
Пример #2
0
def train_seq2reward_compress_model(
    training_data, seq2reward_network, learning_rate=0.1, num_epochs=5
):
    SEQ_LEN, batch_size, NUM_ACTION = next(iter(training_data)).action.shape
    assert SEQ_LEN == 6 and NUM_ACTION == 2

    compress_net_builder = FullyConnected(sizes=[8, 8])
    state_normalization_data = NormalizationData(
        dense_normalization_parameters={
            0: NormalizationParameters(feature_type=DO_NOT_PREPROCESS),
            1: NormalizationParameters(feature_type=DO_NOT_PREPROCESS),
        }
    )
    compress_model_network = compress_net_builder.build_value_network(
        state_normalization_data,
        output_dim=NUM_ACTION,
    )

    trainer_param = Seq2RewardTrainerParameters(
        learning_rate=0.0,
        multi_steps=SEQ_LEN,
        action_names=["0", "1"],
        compress_model_learning_rate=learning_rate,
        gamma=1.0,
        view_q_value=True,
    )

    trainer = CompressModelTrainer(
        compress_model_network=compress_model_network,
        seq2reward_network=seq2reward_network,
        params=trainer_param,
    )

    pl.seed_everything(SEED)
    pl_trainer = pl.Trainer(max_epochs=num_epochs, deterministic=True)
    pl_trainer.fit(trainer, training_data)

    return trainer
Пример #3
0
class Seq2RewardModel(WorldModelBase):
    __hash__ = param_hash
    net_builder: ValueNetBuilder__Union = field(
        # pyre-fixme[28]: Unexpected keyword argument `Seq2RewardNetBuilder`.
        # pyre-fixme[28]: Unexpected keyword argument `Seq2RewardNetBuilder`.
        default_factory=lambda: ValueNetBuilder__Union(
            Seq2RewardNetBuilder=Seq2RewardNetBuilder()
        )
    )

    compress_net_builder: ValueNetBuilder__Union = field(
        # pyre-fixme[28]: Unexpected keyword argument `FullyConnected`.
        # pyre-fixme[28]: Unexpected keyword argument `FullyConnected`.
        default_factory=lambda: ValueNetBuilder__Union(FullyConnected=FullyConnected())
    )

    trainer_param: Seq2RewardTrainerParameters = field(
        default_factory=Seq2RewardTrainerParameters
    )

    preprocessing_options: Optional[PreprocessingOptions] = None

    def build_trainer(self) -> Seq2RewardTrainer:
        seq2reward_network = self.net_builder.value.build_value_network(
            self.state_normalization_data
        )
        trainer = Seq2RewardTrainer(
            seq2reward_network=seq2reward_network, params=self.trainer_param
        )
        if self.use_gpu:
            trainer.seq2reward_network = trainer.seq2reward_network.cuda()
            trainer.step_predict_network = trainer.step_predict_network.cuda()
            trainer.all_permut = trainer.all_permut.cuda()

        return trainer

    def build_serving_module(self) -> torch.nn.Module:
        """
        Returns a TorchScript predictor module
        """
        raise NotImplementedError()
Пример #4
0
class Seq2RewardModel(WorldModelBase):
    __hash__ = param_hash
    net_builder: ValueNetBuilder__Union = field(
        # pyre-fixme[28]: Unexpected keyword argument `Seq2RewardNetBuilder`.
        # pyre-fixme[28]: Unexpected keyword argument `Seq2RewardNetBuilder`.
        default_factory=lambda: ValueNetBuilder__Union(
            Seq2RewardNetBuilder=Seq2RewardNetBuilder()
        )
    )

    compress_net_builder: ValueNetBuilder__Union = field(
        # pyre-fixme[28]: Unexpected keyword argument `FullyConnected`.
        # pyre-fixme[28]: Unexpected keyword argument `FullyConnected`.
        default_factory=lambda: ValueNetBuilder__Union(FullyConnected=FullyConnected())
    )

    trainer_param: Seq2RewardTrainerParameters = field(
        default_factory=Seq2RewardTrainerParameters
    )

    preprocessing_options: Optional[PreprocessingOptions] = None

    # pyre-fixme[15]: `build_trainer` overrides method defined in `ModelManager`
    #  inconsistently.
    def build_trainer(self, use_gpu: bool) -> Seq2RewardTrainer:
        seq2reward_network = self.net_builder.value.build_value_network(
            self.state_normalization_data
        )
        trainer = Seq2RewardTrainer(
            seq2reward_network=seq2reward_network, params=self.trainer_param
        )
        return trainer

    def get_reporter(self) -> Seq2RewardReporter:
        return Seq2RewardReporter(self.trainer_param.action_names)

    def build_serving_module(self) -> torch.nn.Module:
        """
        Returns a TorchScript predictor module
        """
        raise NotImplementedError()