示例#1
0
    def test_fully_connected_with_embedding(self):
        # Intentionally used this long path to make sure we included it in __init__.py
        chooser = DiscreteDQNNetBuilder__Union(
            FullyConnectedWithEmbedding=discrete_dqn.
            fully_connected_with_embedding.FullyConnectedWithEmbedding())
        self._test_discrete_dqn_net_builder(chooser)

        # only id_list
        state_feature_config = rlt.ModelFeatureConfig(
            float_feature_infos=[
                rlt.FloatFeatureInfo(name=str(i), feature_id=i)
                for i in range(1, 5)
            ],
            id_list_feature_configs=[
                rlt.IdListFeatureConfig(name="A",
                                        feature_id=10,
                                        id_mapping_name="A_mapping")
            ],
            id_mapping_config={
                "A_mapping":
                rlt.IdMappingUnion(explicit_mapping=rlt.ExplicitMapping(
                    ids=[0, 1, 2]))
            },
        )
        self._test_discrete_dqn_net_builder(
            chooser, state_feature_config=state_feature_config)

        # with id_score_list
        state_feature_config = rlt.ModelFeatureConfig(
            float_feature_infos=[
                rlt.FloatFeatureInfo(name=str(i), feature_id=i)
                for i in range(1, 5)
            ],
            id_list_feature_configs=[
                rlt.IdListFeatureConfig(name="A",
                                        feature_id=10,
                                        id_mapping_name="A_mapping")
            ],
            id_score_list_feature_configs=[
                rlt.IdScoreListFeatureConfig(name="B",
                                             feature_id=100,
                                             id_mapping_name="A_mapping")
            ],
            id_mapping_config={
                "A_mapping":
                rlt.IdMappingUnion(explicit_mapping=rlt.ExplicitMapping(
                    ids=[0, 1, 2]))
            },
        )
        self._test_discrete_dqn_net_builder(
            chooser, state_feature_config=state_feature_config)
示例#2
0
    def _test_discrete_dqn_net_builder(
        self,
        chooser: DiscreteDQNNetBuilder__Union,
        state_feature_config: Optional[rlt.ModelFeatureConfig] = None,
        serving_module_class=DiscreteDqnPredictorWrapper,
    ) -> None:
        builder = chooser.value
        state_dim = 3
        state_feature_config = state_feature_config or rlt.ModelFeatureConfig(
            float_feature_infos=[
                rlt.FloatFeatureInfo(name=f"f{i}", feature_id=i)
                for i in range(state_dim)
            ])
        state_dim = len(state_feature_config.float_feature_infos)

        state_normalization_data = NormalizationData(
            dense_normalization_parameters={
                fi.feature_id: NormalizationParameters(
                    feature_type=CONTINUOUS, mean=0.0, stddev=1.0)
                for fi in state_feature_config.float_feature_infos
            })

        action_names = ["L", "R"]
        q_network = builder.build_q_network(state_feature_config,
                                            state_normalization_data,
                                            len(action_names))
        x = q_network.input_prototype()
        y = q_network(x)
        self.assertEqual(y.shape, (1, 2))
        serving_module = builder.build_serving_module(
            q_network, state_normalization_data, action_names,
            state_feature_config)
        self.assertIsInstance(serving_module, serving_module_class)
示例#3
0
    def test_discrete_wrapper(self):
        ids = range(1, 5)
        state_normalization_parameters = {i: _cont_norm() for i in ids}
        state_preprocessor = Preprocessor(state_normalization_parameters,
                                          False)
        action_dim = 2
        dqn = models.FullyConnectedDQN(
            state_dim=len(state_normalization_parameters),
            action_dim=action_dim,
            sizes=[16],
            activations=["relu"],
        )
        state_feature_config = rlt.ModelFeatureConfig(float_feature_infos=[
            rlt.FloatFeatureInfo(feature_id=i, name=f"feat_{i}") for i in ids
        ])
        dqn_with_preprocessor = DiscreteDqnWithPreprocessor(
            dqn, state_preprocessor, state_feature_config)
        action_names = ["L", "R"]
        wrapper = DiscreteDqnPredictorWrapper(dqn_with_preprocessor,
                                              action_names,
                                              state_feature_config)
        input_prototype = dqn_with_preprocessor.input_prototype()[0]
        output_action_names, q_values = wrapper(input_prototype)
        self.assertEqual(action_names, output_action_names)
        self.assertEqual(q_values.shape, (1, 2))

        state_with_presence = input_prototype.float_features_with_presence
        expected_output = dqn(
            rlt.FeatureData(state_preprocessor(*state_with_presence)))
        self.assertTrue((expected_output == q_values).all())
示例#4
0
    def test_actor_wrapper(self):
        state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)}
        action_normalization_parameters = {
            i: _cont_action_norm()
            for i in range(101, 105)
        }
        state_preprocessor = Preprocessor(state_normalization_parameters,
                                          False)
        postprocessor = Postprocessor(action_normalization_parameters, False)

        # Test with FullyConnectedActor to make behavior deterministic
        actor = models.FullyConnectedActor(
            state_dim=len(state_normalization_parameters),
            action_dim=len(action_normalization_parameters),
            sizes=[16],
            activations=["relu"],
        )
        state_feature_config = rlt.ModelFeatureConfig()
        actor_with_preprocessor = ActorWithPreprocessor(
            actor, state_preprocessor, state_feature_config, postprocessor)
        wrapper = ActorPredictorWrapper(actor_with_preprocessor,
                                        state_feature_config)
        input_prototype = actor_with_preprocessor.input_prototype()[0]
        action, _log_prob = wrapper(input_prototype)
        self.assertEqual(action.shape,
                         (1, len(action_normalization_parameters)))

        expected_output = postprocessor(
            actor(rlt.FeatureData(
                state_preprocessor(*input_prototype[0]))).action)
        self.assertTrue((expected_output == action).all())
示例#5
0
    def test_discrete_wrapper_with_id_list(self):
        state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)}
        state_preprocessor = Preprocessor(state_normalization_parameters,
                                          False)
        action_dim = 2
        state_feature_config = rlt.ModelFeatureConfig(
            float_feature_infos=[
                rlt.FloatFeatureInfo(name=str(i), feature_id=i)
                for i in range(1, 5)
            ],
            id_list_feature_configs=[
                rlt.IdListFeatureConfig(name="A",
                                        feature_id=10,
                                        id_mapping_name="A_mapping")
            ],
            id_mapping_config={"A_mapping": rlt.IdMapping(ids=[0, 1, 2])},
        )
        embedding_concat = models.EmbeddingBagConcat(
            state_dim=len(state_normalization_parameters),
            model_feature_config=state_feature_config,
            embedding_dim=8,
        )
        dqn = models.Sequential(
            embedding_concat,
            rlt.TensorFeatureData(),
            models.FullyConnectedDQN(
                embedding_concat.output_dim,
                action_dim=action_dim,
                sizes=[16],
                activations=["relu"],
            ),
        )

        dqn_with_preprocessor = DiscreteDqnWithPreprocessor(
            dqn, state_preprocessor, state_feature_config)
        action_names = ["L", "R"]
        wrapper = DiscreteDqnPredictorWrapper(dqn_with_preprocessor,
                                              action_names,
                                              state_feature_config)
        input_prototype = dqn_with_preprocessor.input_prototype()[0]
        output_action_names, q_values = wrapper(input_prototype)
        self.assertEqual(action_names, output_action_names)
        self.assertEqual(q_values.shape, (1, 2))

        feature_id_to_name = {
            config.feature_id: config.name
            for config in state_feature_config.id_list_feature_configs
        }
        state_id_list_features = {
            feature_id_to_name[k]: v
            for k, v in input_prototype.id_list_features.items()
        }
        state_with_presence = input_prototype.float_features_with_presence
        expected_output = dqn(
            rlt.FeatureData(
                float_features=state_preprocessor(*state_with_presence),
                id_list_features=state_id_list_features,
            ))
        self.assertTrue((expected_output == q_values).all())
示例#6
0
def get_feature_config(
        float_features: Optional[List[Tuple[int,
                                            str]]]) -> rlt.ModelFeatureConfig:
    float_features = float_features or []
    float_feature_infos = [
        rlt.FloatFeatureInfo(name=f_name, feature_id=f_id)
        for f_id, f_name in float_features
    ]

    return rlt.ModelFeatureConfig(float_feature_infos=float_feature_infos)
 def _test_actor_net_builder(
         self, chooser: ContinuousActorNetBuilder__Union) -> None:
     builder = chooser.value
     state_dim = 3
     state_normalization_data = NormalizationData(
         dense_normalization_parameters={
             i: NormalizationParameters(
                 feature_type=CONTINUOUS, mean=0.0, stddev=1.0)
             for i in range(state_dim)
         })
     action_dim = 2
     action_normalization_data = NormalizationData(
         dense_normalization_parameters={
             i: NormalizationParameters(
                 feature_type=builder.default_action_preprocessing,
                 min_value=0.0,
                 max_value=1.0,
             )
             for i in range(action_dim)
         })
     state_feature_config = rlt.ModelFeatureConfig()
     actor_network = builder.build_actor(state_feature_config,
                                         state_normalization_data,
                                         action_normalization_data)
     x = actor_network.input_prototype()
     y = actor_network(x)
     action = y.action
     log_prob = y.log_prob
     self.assertEqual(action.shape, (1, action_dim))
     self.assertEqual(log_prob.shape, (1, 1))
     state_feature_config = rlt.ModelFeatureConfig()
     serving_module = builder.build_serving_module(
         actor_network,
         state_feature_config,
         state_normalization_data,
         action_normalization_data,
     )
     self.assertIsInstance(serving_module, ActorPredictorWrapper)
示例#8
0
 def __init__(
     self,
     model: ModelBase,  # acc_reward prediction model
     state_preprocessor: Preprocessor,
     seq_len: int,
     num_action: int,
 ):
     """
     Since TorchScript unable to trace control-flow, we
     have to generate the action enumerations as constants
     here so that trace can use them directly.
     """
     super().__init__(model, state_preprocessor, rlt.ModelFeatureConfig())
     self.seq_len = seq_len
     self.num_action = num_action
     self.all_permut = gen_permutations(seq_len, num_action)
示例#9
0
 def __init__(
     self,
     model: ModelBase,
     state_preprocessor: Preprocessor,
     seq_len: int,
     num_action: int,
     state_feature_config: Optional[rlt.ModelFeatureConfig] = None,
 ):
     super().__init__()
     self.model = model
     self.state_preprocessor = state_preprocessor
     self.state_feature_config = state_feature_config or rlt.ModelFeatureConfig(
     )
     self.sparse_preprocessor = make_sparse_preprocessor(
         self.state_feature_config, device=torch.device("cpu"))
     self.seq_len = seq_len
     self.num_action = num_action
示例#10
0
 def __init__(
     self,
     model: ModelBase,  # acc_reward prediction model
     step_model: ModelBase,  # step prediction model
     state_preprocessor: Preprocessor,
     seq_len: int,
     num_action: int,
 ):
     """
     The difference with Seq2RewardWithPreprocessor:
     This wrapper will plan for different look_ahead steps (between 1 and seq_len),
     and merge results according to look_ahead step prediction probabilities.
     """
     super().__init__(model, state_preprocessor, rlt.ModelFeatureConfig())
     self.step_model = step_model
     self.seq_len = seq_len
     self.num_action = num_action
     # key: seq_len, value: all possible action sequences of length seq_len
     self.all_permut = {
         s + 1: gen_permutations(s + 1, num_action) for s in range(seq_len)
     }
示例#11
0
    def test_MapIDListFeatures(self, mock_make_sparse_preprocessor):
        data = {
            InputColumn.STATE_ID_LIST_FEATURES: {
                0: [torch.tensor(1), torch.tensor(2)]
            },
            InputColumn.STATE_ID_SCORE_LIST_FEATURES: {
                1: [
                    torch.tensor(1),
                    torch.tensor(2),
                    torch.tensor(3),
                ]
            },
        }
        mock_make_sparse_preprocessor.return_value.preprocess_id_list.return_value = {
            InputColumn.STATE_ID_LIST_FEATURES:
            [torch.tensor(2), torch.tensor(3)]
        }
        mock_make_sparse_preprocessor.return_value.preprocess_id_score_list.return_value = {
            InputColumn.STATE_ID_SCORE_LIST_FEATURES: [
                torch.tensor(4),
                torch.tensor(5),
                torch.tensor(6),
            ]
        }
        state_id_list_columns: List[str] = [
            InputColumn.STATE_ID_LIST_FEATURES,
            InputColumn.NEXT_STATE_ID_LIST_FEATURES,
        ]
        state_id_score_list_columns: List[str] = [
            InputColumn.STATE_ID_SCORE_LIST_FEATURES,
            InputColumn.NEXT_STATE_ID_SCORE_LIST_FEATURES,
        ]
        state_feature_config = rlt.ModelFeatureConfig(
            id_list_feature_configs=[
                rlt.IdListFeatureConfig(
                    name=InputColumn.STATE_ID_LIST_FEATURES,
                    feature_id=0,
                    id_mapping_name="state_id_list_features_mapping",
                )
            ],
            id_score_list_feature_configs=[
                rlt.IdScoreListFeatureConfig(
                    name=InputColumn.STATE_ID_SCORE_LIST_FEATURES,
                    feature_id=1,
                    id_mapping_name="state_id_score_list_features_mapping",
                )
            ],
            id_mapping_config={
                "state_id_list_features_mapping":
                rlt.IdMappingUnion(explicit_mapping=rlt.ExplicitMapping(
                    ids=[0, 1, 2])),
                "state_id_score_list_features_mapping":
                rlt.IdMappingUnion(explicit_mapping=rlt.ExplicitMapping(
                    ids=[3, 4, 5])),
            },
        )

        map_id_list_features = transforms.MapIDListFeatures(
            id_list_keys=state_id_list_columns,
            id_score_list_keys=state_id_score_list_columns,
            feature_config=state_feature_config,
            device=torch.device("cpu"),
        )
        out = map_id_list_features(data)
        # output should contain all k in id_list_keys & id_score_list_keys
        self.assertEqual(len(out), 4)
        # The key should contain none if data don't have it
        self.assertIsNone(out[InputColumn.NEXT_STATE_ID_LIST_FEATURES],
                          "It should be filtered out")
        # The value of data changed based on sparse-preprocess mapping
        self.assertEqual(
            out[InputColumn.STATE_ID_LIST_FEATURES],
            {
                InputColumn.STATE_ID_LIST_FEATURES:
                [torch.tensor(2), torch.tensor(3)]
            },
        )
        # Testing assertion in the call method
        wrong_data = {
            InputColumn.STATE_ID_LIST_FEATURES:
            [torch.tensor(1), torch.tensor(2)],
            InputColumn.STATE_ID_SCORE_LIST_FEATURES: [
                torch.tensor(1),
                torch.tensor(2),
                torch.tensor(3),
            ],
        }
        with self.assertRaises(AssertionError):
            map_id_list_features(wrong_data)
        # Testing assertion in the constructor
        state_id_list_columns: List[str] = [
            InputColumn.STATE_ID_LIST_FEATURES,
            InputColumn.NEXT_STATE_ID_LIST_FEATURES,
        ]
        state_id_score_list_columns: List[str] = [
            InputColumn.STATE_ID_LIST_FEATURES,
            InputColumn.NEXT_STATE_ID_LIST_FEATURES,
        ]
        with self.assertRaises(AssertionError):
            transforms.MapIDListFeatures(
                id_list_keys=state_id_list_columns,
                id_score_list_keys=state_id_score_list_columns,
                feature_config=state_feature_config,
                device=torch.device("cpu"),
            )