Exemplo n.º 1
0
    def __init__(
        self,
        model: ModelBase,
        state_preprocessor: Preprocessor,
        seq_len: int,
        num_action: int,
    ):
        """
        Since TorchScript unable to trace control-flow, we
        have to generate the action enumerations as constants
        here so that trace can use them directly.
        """

        super().__init__(model, state_preprocessor, rlt.ModelFeatureConfig())
        self.seq_len = seq_len
        self.num_action = num_action

        def gen_permutations(seq_len: int, num_action: int) -> torch.Tensor:
            """
            generate all seq_len permutations for a given action set
            the return shape is (SEQ_LEN, PERM_NUM, ACTION_DIM)
            """
            all_permut = torch.cartesian_prod(*[torch.arange(num_action)] *
                                              seq_len)
            all_permut = F.one_hot(all_permut, num_action).transpose(0, 1)

            return all_permut.float()

        self.all_permut = gen_permutations(seq_len, num_action)
        self.num_permut = self.all_permut.size(1)
Exemplo n.º 2
0
    def _test_discrete_dqn_net_builder(
        self,
        chooser: DiscreteDQNNetBuilder__Union,
        state_feature_config: Optional[rlt.ModelFeatureConfig] = None,
        serving_module_class=DiscreteDqnPredictorWrapper,
    ) -> None:
        builder = chooser.value
        state_dim = 3
        state_feature_config = state_feature_config or rlt.ModelFeatureConfig(
            float_feature_infos=[
                rlt.FloatFeatureInfo(name=f"f{i}", feature_id=i)
                for i in range(state_dim)
            ])
        state_dim = len(state_feature_config.float_feature_infos)

        state_norm_params = {
            fi.feature_id: NormalizationParameters(feature_type=CONTINUOUS,
                                                   mean=0.0,
                                                   stddev=1.0)
            for fi in state_feature_config.float_feature_infos
        }

        action_names = ["L", "R"]
        q_network = builder.build_q_network(state_feature_config,
                                            state_norm_params,
                                            len(action_names))
        x = q_network.input_prototype()
        y = q_network(x).q_values
        self.assertEqual(y.shape, (1, 2))
        serving_module = builder.build_serving_module(q_network,
                                                      state_norm_params,
                                                      action_names,
                                                      state_feature_config)
        self.assertIsInstance(serving_module, serving_module_class)
Exemplo n.º 3
0
    def test_discrete_wrapper(self):
        ids = range(1, 5)
        state_normalization_parameters = {i: _cont_norm() for i in ids}
        state_preprocessor = Preprocessor(state_normalization_parameters,
                                          False)
        action_dim = 2
        dqn = models.FullyConnectedDQN(
            state_dim=len(state_normalization_parameters),
            action_dim=action_dim,
            sizes=[16],
            activations=["relu"],
        )
        state_feature_config = rlt.ModelFeatureConfig(float_feature_infos=[
            rlt.FloatFeatureInfo(feature_id=i, name=f"feat_{i}") for i in ids
        ])
        dqn_with_preprocessor = DiscreteDqnWithPreprocessor(
            dqn, state_preprocessor, state_feature_config)
        action_names = ["L", "R"]
        wrapper = DiscreteDqnPredictorWrapper(dqn_with_preprocessor,
                                              action_names,
                                              state_feature_config)
        input_prototype = dqn_with_preprocessor.input_prototype()[0]
        output_action_names, q_values = wrapper(input_prototype)
        self.assertEqual(action_names, output_action_names)
        self.assertEqual(q_values.shape, (1, 2))

        state_with_presence = input_prototype.float_features_with_presence
        expected_output = dqn(
            rlt.FeatureData(state_preprocessor(*state_with_presence)))
        self.assertTrue((expected_output == q_values).all())
Exemplo n.º 4
0
    def test_discrete_wrapper_with_id_list(self):
        state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)}
        state_preprocessor = Preprocessor(state_normalization_parameters,
                                          False)
        action_dim = 2
        state_feature_config = rlt.ModelFeatureConfig(
            float_feature_infos=[
                rlt.FloatFeatureInfo(name=str(i), feature_id=i)
                for i in range(1, 5)
            ],
            id_list_feature_configs=[
                rlt.IdListFeatureConfig(name="A",
                                        feature_id=10,
                                        id_mapping_name="A_mapping")
            ],
            id_mapping_config={"A_mapping": rlt.IdMapping(ids=[0, 1, 2])},
        )
        embedding_concat = models.EmbeddingBagConcat(
            state_dim=len(state_normalization_parameters),
            model_feature_config=state_feature_config,
            embedding_dim=8,
        )
        dqn = models.Sequential(
            embedding_concat,
            rlt.TensorFeatureData(),
            models.FullyConnectedDQN(
                embedding_concat.output_dim,
                action_dim=action_dim,
                sizes=[16],
                activations=["relu"],
            ),
        )

        dqn_with_preprocessor = DiscreteDqnWithPreprocessor(
            dqn, state_preprocessor, state_feature_config)
        action_names = ["L", "R"]
        wrapper = DiscreteDqnPredictorWrapper(dqn_with_preprocessor,
                                              action_names,
                                              state_feature_config)
        input_prototype = dqn_with_preprocessor.input_prototype()[0]
        output_action_names, q_values = wrapper(input_prototype)
        self.assertEqual(action_names, output_action_names)
        self.assertEqual(q_values.shape, (1, 2))

        feature_id_to_name = {
            config.feature_id: config.name
            for config in state_feature_config.id_list_feature_configs
        }
        state_id_list_features = {
            feature_id_to_name[k]: v
            for k, v in input_prototype.id_list_features.items()
        }
        state_with_presence = input_prototype.float_features_with_presence
        expected_output = dqn(
            rlt.FeatureData(
                float_features=state_preprocessor(*state_with_presence),
                id_list_features=state_id_list_features,
            ))
        self.assertTrue((expected_output == q_values).all())
Exemplo n.º 5
0
def get_feature_config(
    float_features: Optional[List[Tuple[int, str]]]
) -> rlt.ModelFeatureConfig:
    float_features = float_features or []
    float_feature_infos = [
        rlt.FloatFeatureInfo(name=f_name, feature_id=f_id)
        for f_id, f_name in float_features
    ]

    return rlt.ModelFeatureConfig(float_feature_infos=float_feature_infos)
Exemplo n.º 6
0
    def test_fully_connected_with_embedding(self):
        # Intentionally used this long path to make sure we included it in __init__.py
        chooser = DiscreteDQNNetBuilder__Union(
            FullyConnectedWithEmbedding=discrete_dqn.
            fully_connected_with_embedding.FullyConnectedWithEmbedding())
        self._test_discrete_dqn_net_builder(chooser)

        # only id_list
        state_feature_config = rlt.ModelFeatureConfig(
            float_feature_infos=[
                rlt.FloatFeatureInfo(name=str(i), feature_id=i)
                for i in range(1, 5)
            ],
            id_list_feature_configs=[
                rlt.IdListFeatureConfig(name="A",
                                        feature_id=10,
                                        id_mapping_name="A_mapping")
            ],
            id_mapping_config={"A_mapping": rlt.IdMapping(ids=[0, 1, 2])},
        )
        self._test_discrete_dqn_net_builder(
            chooser, state_feature_config=state_feature_config)

        # with id_score_list
        state_feature_config = rlt.ModelFeatureConfig(
            float_feature_infos=[
                rlt.FloatFeatureInfo(name=str(i), feature_id=i)
                for i in range(1, 5)
            ],
            id_list_feature_configs=[
                rlt.IdListFeatureConfig(name="A",
                                        feature_id=10,
                                        id_mapping_name="A_mapping")
            ],
            id_score_list_feature_configs=[
                rlt.IdScoreListFeatureConfig(name="B",
                                             feature_id=100,
                                             id_mapping_name="A_mapping")
            ],
            id_mapping_config={"A_mapping": rlt.IdMapping(ids=[0, 1, 2])},
        )
        self._test_discrete_dqn_net_builder(
            chooser, state_feature_config=state_feature_config)
Exemplo n.º 7
0
 def feature_config(self):
     return rlt.ModelFeatureConfig(
         id_mapping_config={
             "page":
             rlt.IdMapping(ids=list(range(100, 100 + self.embedding_size)))
         },
         id_list_feature_configs=[
             rlt.IdFeatureConfig(name="page_id",
                                 feature_id=2002,
                                 id_mapping_name="page")
         ],
     )
Exemplo n.º 8
0
 def __init__(
     self,
     model: ModelBase,
     state_preprocessor: Preprocessor,
     state_feature_config: Optional[rlt.ModelFeatureConfig] = None,
 ):
     super().__init__()
     self.model = model
     self.state_preprocessor = state_preprocessor
     self.state_feature_config = state_feature_config or rlt.ModelFeatureConfig(
     )
     self.sparse_preprocessor = make_sparse_preprocessor(
         self.state_feature_config, device=torch.device("cpu"))
Exemplo n.º 9
0
 def __init__(
     self,
     model: ModelBase,  # acc_reward prediction model
     state_preprocessor: Preprocessor,
     seq_len: int,
     num_action: int,
 ):
     """
     Since TorchScript unable to trace control-flow, we
     have to generate the action enumerations as constants
     here so that trace can use them directly.
     """
     super().__init__(model, state_preprocessor, rlt.ModelFeatureConfig())
     self.seq_len = seq_len
     self.num_action = num_action
     self.all_permut = gen_permutations(seq_len, num_action)
Exemplo n.º 10
0
 def __init__(
     self,
     model: ModelBase,  # acc_reward prediction model
     step_model: ModelBase,  # step prediction model
     state_preprocessor: Preprocessor,
     seq_len: int,
     num_action: int,
 ):
     """
     The difference with Seq2RewardWithPreprocessor:
     This wrapper will plan for different look_ahead steps (between 1 and seq_len),
     and merge results according to look_ahead step prediction probabilities.
     """
     super().__init__(model, state_preprocessor, rlt.ModelFeatureConfig())
     self.step_model = step_model
     self.seq_len = seq_len
     self.num_action = num_action
     # key: seq_len, value: all possible action sequences of length seq_len
     self.all_permut = {
         s + 1: gen_permutations(s + 1, num_action)
         for s in range(seq_len)
     }
Exemplo n.º 11
0
class DiscreteDQNBase(ModelManager):
    target_action_distribution: Optional[List[float]] = None
    state_feature_config: rlt.ModelFeatureConfig = field(
        default_factory=lambda: rlt.ModelFeatureConfig(float_feature_infos=[]))
    preprocessing_options: Optional[PreprocessingOptions] = None
    reader_options: Optional[ReaderOptions] = None

    def __post_init_post_parse__(self):
        super().__init__()
        self._metrics_to_score = None
        self._q_network: Optional[ModelBase] = None

    @classmethod
    def normalization_key(cls) -> str:
        return NormalizationKey.STATE

    def create_policy(self, serving: bool) -> Policy:
        """ Create an online DiscreteDQN Policy from env. """
        if serving:
            sampler = GreedyActionSampler()
            scorer = discrete_dqn_serving_scorer(
                DiscreteDqnPredictorUnwrapper(self.build_serving_module()))
        else:
            # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `rl_parameters`.
            # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `rl_parameters`.
            # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `rl_parameters`.
            sampler = SoftmaxActionSampler(
                temperature=self.rl_parameters.temperature)
            # pyre-fixme[16]: `RLTrainer` has no attribute `q_network`.
            scorer = discrete_dqn_scorer(self.trainer.q_network)
        return Policy(scorer=scorer, sampler=sampler)

    @property
    def metrics_to_score(self) -> List[str]:
        assert self._reward_options is not None
        # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `_metrics_to_score`.
        if self._metrics_to_score is None:
            self._metrics_to_score = get_metrics_to_score(
                self._reward_options.metric_reward_values)
        return self._metrics_to_score

    @property
    def should_generate_eval_dataset(self) -> bool:
        # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `eval_parameters`.
        return self.eval_parameters.calc_cpe_in_training

    def _set_normalization_parameters(
            self, normalization_data_map: Dict[str,
                                               NormalizationData]) -> None:
        """
        Set normalization parameters on current instance
        """
        state_norm_data = normalization_data_map.get(self.normalization_key(),
                                                     None)
        assert state_norm_data is not None
        assert state_norm_data.dense_normalization_parameters is not None
        # pyre-fixme[8]: Attribute has type `Dict[int,
        #  reagent.parameters.NormalizationParameters]`; used as `Optional[Dict[int,
        #  reagent.parameters.NormalizationParameters]]`.
        self.state_normalization_parameters = (
            state_norm_data.dense_normalization_parameters)
        self.set_normalization_data_map(normalization_data_map)

    def run_feature_identification(
            self, input_table_spec: TableSpec) -> Dict[str, NormalizationData]:
        preprocessing_options = self.preprocessing_options or PreprocessingOptions(
        )
        logger.info("Overriding whitelist_features")
        state_features = [
            ffi.feature_id
            for ffi in self.state_feature_config.float_feature_infos
        ]
        preprocessing_options = preprocessing_options._replace(
            whitelist_features=state_features)

        state_normalization_parameters = identify_normalization_parameters(
            input_table_spec, InputColumn.STATE_FEATURES,
            preprocessing_options)
        return {
            NormalizationKey.STATE:
            NormalizationData(
                dense_normalization_parameters=state_normalization_parameters)
        }

    def query_data(
        self,
        input_table_spec: TableSpec,
        sample_range: Optional[Tuple[float, float]],
        reward_options: RewardOptions,
    ) -> Dataset:
        return query_data(
            input_table_spec=input_table_spec,
            discrete_action=True,
            # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `action_names`.
            actions=self.action_names,
            include_possible_actions=True,
            sample_range=sample_range,
            custom_reward_expression=reward_options.custom_reward_expression,
            multi_steps=self.multi_steps,
            # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `rl_parameters`.
            gamma=self.rl_parameters.gamma,
        )

    @property
    def multi_steps(self) -> Optional[int]:
        # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `rl_parameters`.
        return self.rl_parameters.multi_steps

    def build_batch_preprocessor(self) -> BatchPreprocessor:
        return DiscreteDqnBatchPreprocessor(
            # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `action_names`.
            num_actions=len(self.action_names),
            state_preprocessor=Preprocessor(
                normalization_parameters=self.state_normalization_parameters,
                use_gpu=self.use_gpu,
            ),
            use_gpu=self.use_gpu,
        )

    def train(self, train_dataset: Dataset, eval_dataset: Optional[Dataset],
              num_epochs: int) -> RLTrainingOutput:
        """
        Train the model

        Returns partially filled RLTrainingOutput.
        The field that should not be filled are:
        - output_path
        """
        reporter = DiscreteDQNReporter(
            # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `trainer_param`.
            self.trainer_param.actions,
            target_action_distribution=self.target_action_distribution,
        )
        # pyre-fixme[16]: `RLTrainer` has no attribute `add_observer`.
        self.trainer.add_observer(reporter)

        evaluator = Evaluator(
            # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `action_names`.
            self.action_names,
            # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `rl_parameters`.
            self.rl_parameters.gamma,
            self.trainer,
            metrics_to_score=self.metrics_to_score,
        )
        # pyre-fixme[16]: `Evaluator` has no attribute `add_observer`.
        evaluator.add_observer(reporter)

        batch_preprocessor = self.build_batch_preprocessor()
        train_and_evaluate_generic(
            train_dataset,
            eval_dataset,
            self.trainer,
            num_epochs,
            self.use_gpu,
            batch_preprocessor,
            reporter,
            evaluator,
            reader_options=self.reader_options,
        )
        # pyre-fixme[16]: `RLTrainingReport` has no attribute `make_union_instance`.
        training_report = RLTrainingReport.make_union_instance(
            reporter.generate_training_report())
        return RLTrainingOutput(training_report=training_report)
Exemplo n.º 12
0
class DiscreteDQNBase(ModelManager):
    target_action_distribution: Optional[List[float]] = None
    state_feature_config: rlt.ModelFeatureConfig = field(
        default_factory=lambda: rlt.ModelFeatureConfig(float_feature_infos=[])
    )
    preprocessing_options: Optional[PreprocessingOptions] = None
    reader_options: Optional[ReaderOptions] = None

    def __post_init_post_parse__(self):
        super().__init__()
        self._metrics_to_score = None
        self._q_network: Optional[ModelBase] = None

    @classmethod
    def normalization_key(cls) -> str:
        return DiscreteNormalizationParameterKeys.STATE

    def create_policy(self, serving: bool) -> Policy:
        """ Create an online DiscreteDQN Policy from env. """

        from reagent.gym.policies.samplers.discrete_sampler import SoftmaxActionSampler
        from reagent.gym.policies.scorers.discrete_scorer import (
            discrete_dqn_scorer,
            discrete_dqn_serving_scorer,
        )

        sampler = SoftmaxActionSampler(temperature=self.rl_parameters.temperature)
        if serving:
            scorer = discrete_dqn_serving_scorer(
                DiscreteDqnPredictorUnwrapper(self.build_serving_module())
            )
        else:
            scorer = discrete_dqn_scorer(self.trainer.q_network)
        return Policy(scorer=scorer, sampler=sampler)

    @property
    def metrics_to_score(self) -> List[str]:
        assert self.reward_options is not None
        if self._metrics_to_score is None:
            self._metrics_to_score = get_metrics_to_score(
                self._reward_options.metric_reward_values
            )
        return self._metrics_to_score

    @property
    def should_generate_eval_dataset(self) -> bool:
        return self.eval_parameters.calc_cpe_in_training

    def _set_normalization_parameters(
        self, normalization_data_map: Dict[str, NormalizationData]
    ):
        """
        Set normalization parameters on current instance
        """
        state_norm_data = normalization_data_map.get(self.normalization_key(), None)
        assert state_norm_data is not None
        assert state_norm_data.dense_normalization_parameters is not None
        self.state_normalization_parameters = (
            state_norm_data.dense_normalization_parameters
        )

    def run_feature_identification(
        self, input_table_spec: TableSpec
    ) -> Dict[str, NormalizationData]:
        preprocessing_options = self.preprocessing_options or PreprocessingOptions()
        logger.info("Overriding whitelist_features")
        state_features = [
            ffi.feature_id for ffi in self.state_feature_config.float_feature_infos
        ]
        preprocessing_options = preprocessing_options._replace(
            whitelist_features=state_features
        )

        state_normalization_parameters = identify_normalization_parameters(
            input_table_spec, "state_features", preprocessing_options
        )
        return {
            DiscreteNormalizationParameterKeys.STATE: NormalizationData(
                dense_normalization_parameters=state_normalization_parameters
            )
        }

    def query_data(
        self,
        input_table_spec: TableSpec,
        sample_range: Optional[Tuple[float, float]],
        reward_options: RewardOptions,
    ) -> Dataset:
        return query_data(
            input_table_spec=input_table_spec,
            actions=self.action_names,
            sample_range=sample_range,
            custom_reward_expression=reward_options.custom_reward_expression,
            multi_steps=self.multi_steps,
            gamma=self.rl_parameters.gamma,
        )

    @property
    def multi_steps(self) -> Optional[int]:
        return self.rl_parameters.multi_steps

    def build_batch_preprocessor(self) -> BatchPreprocessor:
        return DiscreteDqnBatchPreprocessor(
            num_actions=len(self.action_names),
            state_preprocessor=Preprocessor(
                normalization_parameters=self.state_normalization_parameters,
                use_gpu=self.use_gpu,
            ),
            use_gpu=self.use_gpu,
        )

    def train(
        self, train_dataset: Dataset, eval_dataset: Optional[Dataset], num_epochs: int
    ) -> RLTrainingOutput:
        """
        Train the model

        Returns partially filled RLTrainingOutput. The field that should not be filled
        are:
        - output_path
        - warmstart_output_path
        - vis_metrics
        - validation_output
        """
        logger.info("Creating reporter")
        reporter = DiscreteDQNReporter(
            self.trainer_param.actions,
            target_action_distribution=self.target_action_distribution,
        )
        logger.info("Adding reporter to trainer")
        self.trainer.add_observer(reporter)

        training_page_handler = TrainingPageHandler(self.trainer)
        training_page_handler.add_observer(reporter)
        evaluator = Evaluator(
            self.action_names,
            self.rl_parameters.gamma,
            self.trainer,
            metrics_to_score=self.metrics_to_score,
        )
        logger.info("Adding reporter to evaluator")
        evaluator.add_observer(reporter)
        evaluation_page_handler = EvaluationPageHandler(
            self.trainer, evaluator, reporter
        )

        batch_preprocessor = self.build_batch_preprocessor()
        train_and_evaluate_generic(
            train_dataset,
            eval_dataset,
            self.trainer,
            num_epochs,
            self.use_gpu,
            batch_preprocessor,
            training_page_handler,
            evaluation_page_handler,
            reader_options=self.reader_options,
        )
        training_report = RLTrainingReport.make_union_instance(
            reporter.generate_training_report()
        )
        return RLTrainingOutput(training_report=training_report)