def test_fully_connected_with_embedding(self): # Intentionally used this long path to make sure we included it in __init__.py chooser = DiscreteDQNNetBuilder__Union( FullyConnectedWithEmbedding=discrete_dqn. fully_connected_with_embedding.FullyConnectedWithEmbedding()) self._test_discrete_dqn_net_builder(chooser) # only id_list state_feature_config = rlt.ModelFeatureConfig( float_feature_infos=[ rlt.FloatFeatureInfo(name=str(i), feature_id=i) for i in range(1, 5) ], id_list_feature_configs=[ rlt.IdListFeatureConfig(name="A", feature_id=10, id_mapping_name="A_mapping") ], id_mapping_config={ "A_mapping": rlt.IdMappingUnion(explicit_mapping=rlt.ExplicitMapping( ids=[0, 1, 2])) }, ) self._test_discrete_dqn_net_builder( chooser, state_feature_config=state_feature_config) # with id_score_list state_feature_config = rlt.ModelFeatureConfig( float_feature_infos=[ rlt.FloatFeatureInfo(name=str(i), feature_id=i) for i in range(1, 5) ], id_list_feature_configs=[ rlt.IdListFeatureConfig(name="A", feature_id=10, id_mapping_name="A_mapping") ], id_score_list_feature_configs=[ rlt.IdScoreListFeatureConfig(name="B", feature_id=100, id_mapping_name="A_mapping") ], id_mapping_config={ "A_mapping": rlt.IdMappingUnion(explicit_mapping=rlt.ExplicitMapping( ids=[0, 1, 2])) }, ) self._test_discrete_dqn_net_builder( chooser, state_feature_config=state_feature_config)
def _test_discrete_dqn_net_builder( self, chooser: DiscreteDQNNetBuilder__Union, state_feature_config: Optional[rlt.ModelFeatureConfig] = None, serving_module_class=DiscreteDqnPredictorWrapper, ) -> None: builder = chooser.value state_dim = 3 state_feature_config = state_feature_config or rlt.ModelFeatureConfig( float_feature_infos=[ rlt.FloatFeatureInfo(name=f"f{i}", feature_id=i) for i in range(state_dim) ]) state_dim = len(state_feature_config.float_feature_infos) state_normalization_data = NormalizationData( dense_normalization_parameters={ fi.feature_id: NormalizationParameters( feature_type=CONTINUOUS, mean=0.0, stddev=1.0) for fi in state_feature_config.float_feature_infos }) action_names = ["L", "R"] q_network = builder.build_q_network(state_feature_config, state_normalization_data, len(action_names)) x = q_network.input_prototype() y = q_network(x) self.assertEqual(y.shape, (1, 2)) serving_module = builder.build_serving_module( q_network, state_normalization_data, action_names, state_feature_config) self.assertIsInstance(serving_module, serving_module_class)
def test_discrete_wrapper(self): ids = range(1, 5) state_normalization_parameters = {i: _cont_norm() for i in ids} state_preprocessor = Preprocessor(state_normalization_parameters, False) action_dim = 2 dqn = models.FullyConnectedDQN( state_dim=len(state_normalization_parameters), action_dim=action_dim, sizes=[16], activations=["relu"], ) state_feature_config = rlt.ModelFeatureConfig(float_feature_infos=[ rlt.FloatFeatureInfo(feature_id=i, name=f"feat_{i}") for i in ids ]) dqn_with_preprocessor = DiscreteDqnWithPreprocessor( dqn, state_preprocessor, state_feature_config) action_names = ["L", "R"] wrapper = DiscreteDqnPredictorWrapper(dqn_with_preprocessor, action_names, state_feature_config) input_prototype = dqn_with_preprocessor.input_prototype()[0] output_action_names, q_values = wrapper(input_prototype) self.assertEqual(action_names, output_action_names) self.assertEqual(q_values.shape, (1, 2)) state_with_presence = input_prototype.float_features_with_presence expected_output = dqn( rlt.FeatureData(state_preprocessor(*state_with_presence))) self.assertTrue((expected_output == q_values).all())
def test_actor_wrapper(self): state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)} action_normalization_parameters = { i: _cont_action_norm() for i in range(101, 105) } state_preprocessor = Preprocessor(state_normalization_parameters, False) postprocessor = Postprocessor(action_normalization_parameters, False) # Test with FullyConnectedActor to make behavior deterministic actor = models.FullyConnectedActor( state_dim=len(state_normalization_parameters), action_dim=len(action_normalization_parameters), sizes=[16], activations=["relu"], ) state_feature_config = rlt.ModelFeatureConfig() actor_with_preprocessor = ActorWithPreprocessor( actor, state_preprocessor, state_feature_config, postprocessor) wrapper = ActorPredictorWrapper(actor_with_preprocessor, state_feature_config) input_prototype = actor_with_preprocessor.input_prototype()[0] action, _log_prob = wrapper(input_prototype) self.assertEqual(action.shape, (1, len(action_normalization_parameters))) expected_output = postprocessor( actor(rlt.FeatureData( state_preprocessor(*input_prototype[0]))).action) self.assertTrue((expected_output == action).all())
def test_discrete_wrapper_with_id_list(self): state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)} state_preprocessor = Preprocessor(state_normalization_parameters, False) action_dim = 2 state_feature_config = rlt.ModelFeatureConfig( float_feature_infos=[ rlt.FloatFeatureInfo(name=str(i), feature_id=i) for i in range(1, 5) ], id_list_feature_configs=[ rlt.IdListFeatureConfig(name="A", feature_id=10, id_mapping_name="A_mapping") ], id_mapping_config={"A_mapping": rlt.IdMapping(ids=[0, 1, 2])}, ) embedding_concat = models.EmbeddingBagConcat( state_dim=len(state_normalization_parameters), model_feature_config=state_feature_config, embedding_dim=8, ) dqn = models.Sequential( embedding_concat, rlt.TensorFeatureData(), models.FullyConnectedDQN( embedding_concat.output_dim, action_dim=action_dim, sizes=[16], activations=["relu"], ), ) dqn_with_preprocessor = DiscreteDqnWithPreprocessor( dqn, state_preprocessor, state_feature_config) action_names = ["L", "R"] wrapper = DiscreteDqnPredictorWrapper(dqn_with_preprocessor, action_names, state_feature_config) input_prototype = dqn_with_preprocessor.input_prototype()[0] output_action_names, q_values = wrapper(input_prototype) self.assertEqual(action_names, output_action_names) self.assertEqual(q_values.shape, (1, 2)) feature_id_to_name = { config.feature_id: config.name for config in state_feature_config.id_list_feature_configs } state_id_list_features = { feature_id_to_name[k]: v for k, v in input_prototype.id_list_features.items() } state_with_presence = input_prototype.float_features_with_presence expected_output = dqn( rlt.FeatureData( float_features=state_preprocessor(*state_with_presence), id_list_features=state_id_list_features, )) self.assertTrue((expected_output == q_values).all())
def get_feature_config( float_features: Optional[List[Tuple[int, str]]]) -> rlt.ModelFeatureConfig: float_features = float_features or [] float_feature_infos = [ rlt.FloatFeatureInfo(name=f_name, feature_id=f_id) for f_id, f_name in float_features ] return rlt.ModelFeatureConfig(float_feature_infos=float_feature_infos)
def _test_actor_net_builder( self, chooser: ContinuousActorNetBuilder__Union) -> None: builder = chooser.value state_dim = 3 state_normalization_data = NormalizationData( dense_normalization_parameters={ i: NormalizationParameters( feature_type=CONTINUOUS, mean=0.0, stddev=1.0) for i in range(state_dim) }) action_dim = 2 action_normalization_data = NormalizationData( dense_normalization_parameters={ i: NormalizationParameters( feature_type=builder.default_action_preprocessing, min_value=0.0, max_value=1.0, ) for i in range(action_dim) }) state_feature_config = rlt.ModelFeatureConfig() actor_network = builder.build_actor(state_feature_config, state_normalization_data, action_normalization_data) x = actor_network.input_prototype() y = actor_network(x) action = y.action log_prob = y.log_prob self.assertEqual(action.shape, (1, action_dim)) self.assertEqual(log_prob.shape, (1, 1)) state_feature_config = rlt.ModelFeatureConfig() serving_module = builder.build_serving_module( actor_network, state_feature_config, state_normalization_data, action_normalization_data, ) self.assertIsInstance(serving_module, ActorPredictorWrapper)
def __init__( self, model: ModelBase, # acc_reward prediction model state_preprocessor: Preprocessor, seq_len: int, num_action: int, ): """ Since TorchScript unable to trace control-flow, we have to generate the action enumerations as constants here so that trace can use them directly. """ super().__init__(model, state_preprocessor, rlt.ModelFeatureConfig()) self.seq_len = seq_len self.num_action = num_action self.all_permut = gen_permutations(seq_len, num_action)
def __init__( self, model: ModelBase, state_preprocessor: Preprocessor, seq_len: int, num_action: int, state_feature_config: Optional[rlt.ModelFeatureConfig] = None, ): super().__init__() self.model = model self.state_preprocessor = state_preprocessor self.state_feature_config = state_feature_config or rlt.ModelFeatureConfig( ) self.sparse_preprocessor = make_sparse_preprocessor( self.state_feature_config, device=torch.device("cpu")) self.seq_len = seq_len self.num_action = num_action
def __init__( self, model: ModelBase, # acc_reward prediction model step_model: ModelBase, # step prediction model state_preprocessor: Preprocessor, seq_len: int, num_action: int, ): """ The difference with Seq2RewardWithPreprocessor: This wrapper will plan for different look_ahead steps (between 1 and seq_len), and merge results according to look_ahead step prediction probabilities. """ super().__init__(model, state_preprocessor, rlt.ModelFeatureConfig()) self.step_model = step_model self.seq_len = seq_len self.num_action = num_action # key: seq_len, value: all possible action sequences of length seq_len self.all_permut = { s + 1: gen_permutations(s + 1, num_action) for s in range(seq_len) }
def test_MapIDListFeatures(self, mock_make_sparse_preprocessor): data = { InputColumn.STATE_ID_LIST_FEATURES: { 0: [torch.tensor(1), torch.tensor(2)] }, InputColumn.STATE_ID_SCORE_LIST_FEATURES: { 1: [ torch.tensor(1), torch.tensor(2), torch.tensor(3), ] }, } mock_make_sparse_preprocessor.return_value.preprocess_id_list.return_value = { InputColumn.STATE_ID_LIST_FEATURES: [torch.tensor(2), torch.tensor(3)] } mock_make_sparse_preprocessor.return_value.preprocess_id_score_list.return_value = { InputColumn.STATE_ID_SCORE_LIST_FEATURES: [ torch.tensor(4), torch.tensor(5), torch.tensor(6), ] } state_id_list_columns: List[str] = [ InputColumn.STATE_ID_LIST_FEATURES, InputColumn.NEXT_STATE_ID_LIST_FEATURES, ] state_id_score_list_columns: List[str] = [ InputColumn.STATE_ID_SCORE_LIST_FEATURES, InputColumn.NEXT_STATE_ID_SCORE_LIST_FEATURES, ] state_feature_config = rlt.ModelFeatureConfig( id_list_feature_configs=[ rlt.IdListFeatureConfig( name=InputColumn.STATE_ID_LIST_FEATURES, feature_id=0, id_mapping_name="state_id_list_features_mapping", ) ], id_score_list_feature_configs=[ rlt.IdScoreListFeatureConfig( name=InputColumn.STATE_ID_SCORE_LIST_FEATURES, feature_id=1, id_mapping_name="state_id_score_list_features_mapping", ) ], id_mapping_config={ "state_id_list_features_mapping": rlt.IdMappingUnion(explicit_mapping=rlt.ExplicitMapping( ids=[0, 1, 2])), "state_id_score_list_features_mapping": rlt.IdMappingUnion(explicit_mapping=rlt.ExplicitMapping( ids=[3, 4, 5])), }, ) map_id_list_features = transforms.MapIDListFeatures( id_list_keys=state_id_list_columns, id_score_list_keys=state_id_score_list_columns, feature_config=state_feature_config, device=torch.device("cpu"), ) out = map_id_list_features(data) # output should contain all k in id_list_keys & id_score_list_keys self.assertEqual(len(out), 4) # The key should contain none if data don't have it self.assertIsNone(out[InputColumn.NEXT_STATE_ID_LIST_FEATURES], "It should be filtered out") # The value of data changed based on sparse-preprocess mapping self.assertEqual( out[InputColumn.STATE_ID_LIST_FEATURES], { InputColumn.STATE_ID_LIST_FEATURES: [torch.tensor(2), torch.tensor(3)] }, ) # Testing assertion in the call method wrong_data = { InputColumn.STATE_ID_LIST_FEATURES: [torch.tensor(1), torch.tensor(2)], InputColumn.STATE_ID_SCORE_LIST_FEATURES: [ torch.tensor(1), torch.tensor(2), torch.tensor(3), ], } with self.assertRaises(AssertionError): map_id_list_features(wrong_data) # Testing assertion in the constructor state_id_list_columns: List[str] = [ InputColumn.STATE_ID_LIST_FEATURES, InputColumn.NEXT_STATE_ID_LIST_FEATURES, ] state_id_score_list_columns: List[str] = [ InputColumn.STATE_ID_LIST_FEATURES, InputColumn.NEXT_STATE_ID_LIST_FEATURES, ] with self.assertRaises(AssertionError): transforms.MapIDListFeatures( id_list_keys=state_id_list_columns, id_score_list_keys=state_id_score_list_columns, feature_config=state_feature_config, device=torch.device("cpu"), )