def __init__( self, model: ModelBase, state_preprocessor: Preprocessor, seq_len: int, num_action: int, ): """ Since TorchScript unable to trace control-flow, we have to generate the action enumerations as constants here so that trace can use them directly. """ super().__init__(model, state_preprocessor, rlt.ModelFeatureConfig()) self.seq_len = seq_len self.num_action = num_action def gen_permutations(seq_len: int, num_action: int) -> torch.Tensor: """ generate all seq_len permutations for a given action set the return shape is (SEQ_LEN, PERM_NUM, ACTION_DIM) """ all_permut = torch.cartesian_prod(*[torch.arange(num_action)] * seq_len) all_permut = F.one_hot(all_permut, num_action).transpose(0, 1) return all_permut.float() self.all_permut = gen_permutations(seq_len, num_action) self.num_permut = self.all_permut.size(1)
def _test_discrete_dqn_net_builder( self, chooser: DiscreteDQNNetBuilder__Union, state_feature_config: Optional[rlt.ModelFeatureConfig] = None, serving_module_class=DiscreteDqnPredictorWrapper, ) -> None: builder = chooser.value state_dim = 3 state_feature_config = state_feature_config or rlt.ModelFeatureConfig( float_feature_infos=[ rlt.FloatFeatureInfo(name=f"f{i}", feature_id=i) for i in range(state_dim) ]) state_dim = len(state_feature_config.float_feature_infos) state_norm_params = { fi.feature_id: NormalizationParameters(feature_type=CONTINUOUS, mean=0.0, stddev=1.0) for fi in state_feature_config.float_feature_infos } action_names = ["L", "R"] q_network = builder.build_q_network(state_feature_config, state_norm_params, len(action_names)) x = q_network.input_prototype() y = q_network(x).q_values self.assertEqual(y.shape, (1, 2)) serving_module = builder.build_serving_module(q_network, state_norm_params, action_names, state_feature_config) self.assertIsInstance(serving_module, serving_module_class)
def test_discrete_wrapper(self): ids = range(1, 5) state_normalization_parameters = {i: _cont_norm() for i in ids} state_preprocessor = Preprocessor(state_normalization_parameters, False) action_dim = 2 dqn = models.FullyConnectedDQN( state_dim=len(state_normalization_parameters), action_dim=action_dim, sizes=[16], activations=["relu"], ) state_feature_config = rlt.ModelFeatureConfig(float_feature_infos=[ rlt.FloatFeatureInfo(feature_id=i, name=f"feat_{i}") for i in ids ]) dqn_with_preprocessor = DiscreteDqnWithPreprocessor( dqn, state_preprocessor, state_feature_config) action_names = ["L", "R"] wrapper = DiscreteDqnPredictorWrapper(dqn_with_preprocessor, action_names, state_feature_config) input_prototype = dqn_with_preprocessor.input_prototype()[0] output_action_names, q_values = wrapper(input_prototype) self.assertEqual(action_names, output_action_names) self.assertEqual(q_values.shape, (1, 2)) state_with_presence = input_prototype.float_features_with_presence expected_output = dqn( rlt.FeatureData(state_preprocessor(*state_with_presence))) self.assertTrue((expected_output == q_values).all())
def test_discrete_wrapper_with_id_list(self): state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)} state_preprocessor = Preprocessor(state_normalization_parameters, False) action_dim = 2 state_feature_config = rlt.ModelFeatureConfig( float_feature_infos=[ rlt.FloatFeatureInfo(name=str(i), feature_id=i) for i in range(1, 5) ], id_list_feature_configs=[ rlt.IdListFeatureConfig(name="A", feature_id=10, id_mapping_name="A_mapping") ], id_mapping_config={"A_mapping": rlt.IdMapping(ids=[0, 1, 2])}, ) embedding_concat = models.EmbeddingBagConcat( state_dim=len(state_normalization_parameters), model_feature_config=state_feature_config, embedding_dim=8, ) dqn = models.Sequential( embedding_concat, rlt.TensorFeatureData(), models.FullyConnectedDQN( embedding_concat.output_dim, action_dim=action_dim, sizes=[16], activations=["relu"], ), ) dqn_with_preprocessor = DiscreteDqnWithPreprocessor( dqn, state_preprocessor, state_feature_config) action_names = ["L", "R"] wrapper = DiscreteDqnPredictorWrapper(dqn_with_preprocessor, action_names, state_feature_config) input_prototype = dqn_with_preprocessor.input_prototype()[0] output_action_names, q_values = wrapper(input_prototype) self.assertEqual(action_names, output_action_names) self.assertEqual(q_values.shape, (1, 2)) feature_id_to_name = { config.feature_id: config.name for config in state_feature_config.id_list_feature_configs } state_id_list_features = { feature_id_to_name[k]: v for k, v in input_prototype.id_list_features.items() } state_with_presence = input_prototype.float_features_with_presence expected_output = dqn( rlt.FeatureData( float_features=state_preprocessor(*state_with_presence), id_list_features=state_id_list_features, )) self.assertTrue((expected_output == q_values).all())
def get_feature_config( float_features: Optional[List[Tuple[int, str]]] ) -> rlt.ModelFeatureConfig: float_features = float_features or [] float_feature_infos = [ rlt.FloatFeatureInfo(name=f_name, feature_id=f_id) for f_id, f_name in float_features ] return rlt.ModelFeatureConfig(float_feature_infos=float_feature_infos)
def test_fully_connected_with_embedding(self): # Intentionally used this long path to make sure we included it in __init__.py chooser = DiscreteDQNNetBuilder__Union( FullyConnectedWithEmbedding=discrete_dqn. fully_connected_with_embedding.FullyConnectedWithEmbedding()) self._test_discrete_dqn_net_builder(chooser) # only id_list state_feature_config = rlt.ModelFeatureConfig( float_feature_infos=[ rlt.FloatFeatureInfo(name=str(i), feature_id=i) for i in range(1, 5) ], id_list_feature_configs=[ rlt.IdListFeatureConfig(name="A", feature_id=10, id_mapping_name="A_mapping") ], id_mapping_config={"A_mapping": rlt.IdMapping(ids=[0, 1, 2])}, ) self._test_discrete_dqn_net_builder( chooser, state_feature_config=state_feature_config) # with id_score_list state_feature_config = rlt.ModelFeatureConfig( float_feature_infos=[ rlt.FloatFeatureInfo(name=str(i), feature_id=i) for i in range(1, 5) ], id_list_feature_configs=[ rlt.IdListFeatureConfig(name="A", feature_id=10, id_mapping_name="A_mapping") ], id_score_list_feature_configs=[ rlt.IdScoreListFeatureConfig(name="B", feature_id=100, id_mapping_name="A_mapping") ], id_mapping_config={"A_mapping": rlt.IdMapping(ids=[0, 1, 2])}, ) self._test_discrete_dqn_net_builder( chooser, state_feature_config=state_feature_config)
def feature_config(self): return rlt.ModelFeatureConfig( id_mapping_config={ "page": rlt.IdMapping(ids=list(range(100, 100 + self.embedding_size))) }, id_list_feature_configs=[ rlt.IdFeatureConfig(name="page_id", feature_id=2002, id_mapping_name="page") ], )
def __init__( self, model: ModelBase, state_preprocessor: Preprocessor, state_feature_config: Optional[rlt.ModelFeatureConfig] = None, ): super().__init__() self.model = model self.state_preprocessor = state_preprocessor self.state_feature_config = state_feature_config or rlt.ModelFeatureConfig( ) self.sparse_preprocessor = make_sparse_preprocessor( self.state_feature_config, device=torch.device("cpu"))
def __init__( self, model: ModelBase, # acc_reward prediction model state_preprocessor: Preprocessor, seq_len: int, num_action: int, ): """ Since TorchScript unable to trace control-flow, we have to generate the action enumerations as constants here so that trace can use them directly. """ super().__init__(model, state_preprocessor, rlt.ModelFeatureConfig()) self.seq_len = seq_len self.num_action = num_action self.all_permut = gen_permutations(seq_len, num_action)
def __init__( self, model: ModelBase, # acc_reward prediction model step_model: ModelBase, # step prediction model state_preprocessor: Preprocessor, seq_len: int, num_action: int, ): """ The difference with Seq2RewardWithPreprocessor: This wrapper will plan for different look_ahead steps (between 1 and seq_len), and merge results according to look_ahead step prediction probabilities. """ super().__init__(model, state_preprocessor, rlt.ModelFeatureConfig()) self.step_model = step_model self.seq_len = seq_len self.num_action = num_action # key: seq_len, value: all possible action sequences of length seq_len self.all_permut = { s + 1: gen_permutations(s + 1, num_action) for s in range(seq_len) }
class DiscreteDQNBase(ModelManager): target_action_distribution: Optional[List[float]] = None state_feature_config: rlt.ModelFeatureConfig = field( default_factory=lambda: rlt.ModelFeatureConfig(float_feature_infos=[])) preprocessing_options: Optional[PreprocessingOptions] = None reader_options: Optional[ReaderOptions] = None def __post_init_post_parse__(self): super().__init__() self._metrics_to_score = None self._q_network: Optional[ModelBase] = None @classmethod def normalization_key(cls) -> str: return NormalizationKey.STATE def create_policy(self, serving: bool) -> Policy: """ Create an online DiscreteDQN Policy from env. """ if serving: sampler = GreedyActionSampler() scorer = discrete_dqn_serving_scorer( DiscreteDqnPredictorUnwrapper(self.build_serving_module())) else: # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `rl_parameters`. # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `rl_parameters`. # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `rl_parameters`. sampler = SoftmaxActionSampler( temperature=self.rl_parameters.temperature) # pyre-fixme[16]: `RLTrainer` has no attribute `q_network`. scorer = discrete_dqn_scorer(self.trainer.q_network) return Policy(scorer=scorer, sampler=sampler) @property def metrics_to_score(self) -> List[str]: assert self._reward_options is not None # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `_metrics_to_score`. if self._metrics_to_score is None: self._metrics_to_score = get_metrics_to_score( self._reward_options.metric_reward_values) return self._metrics_to_score @property def should_generate_eval_dataset(self) -> bool: # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `eval_parameters`. return self.eval_parameters.calc_cpe_in_training def _set_normalization_parameters( self, normalization_data_map: Dict[str, NormalizationData]) -> None: """ Set normalization parameters on current instance """ state_norm_data = normalization_data_map.get(self.normalization_key(), None) assert state_norm_data is not None assert state_norm_data.dense_normalization_parameters is not None # pyre-fixme[8]: Attribute has type `Dict[int, # reagent.parameters.NormalizationParameters]`; used as `Optional[Dict[int, # reagent.parameters.NormalizationParameters]]`. self.state_normalization_parameters = ( state_norm_data.dense_normalization_parameters) self.set_normalization_data_map(normalization_data_map) def run_feature_identification( self, input_table_spec: TableSpec) -> Dict[str, NormalizationData]: preprocessing_options = self.preprocessing_options or PreprocessingOptions( ) logger.info("Overriding whitelist_features") state_features = [ ffi.feature_id for ffi in self.state_feature_config.float_feature_infos ] preprocessing_options = preprocessing_options._replace( whitelist_features=state_features) state_normalization_parameters = identify_normalization_parameters( input_table_spec, InputColumn.STATE_FEATURES, preprocessing_options) return { NormalizationKey.STATE: NormalizationData( dense_normalization_parameters=state_normalization_parameters) } def query_data( self, input_table_spec: TableSpec, sample_range: Optional[Tuple[float, float]], reward_options: RewardOptions, ) -> Dataset: return query_data( input_table_spec=input_table_spec, discrete_action=True, # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `action_names`. actions=self.action_names, include_possible_actions=True, sample_range=sample_range, custom_reward_expression=reward_options.custom_reward_expression, multi_steps=self.multi_steps, # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `rl_parameters`. gamma=self.rl_parameters.gamma, ) @property def multi_steps(self) -> Optional[int]: # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `rl_parameters`. return self.rl_parameters.multi_steps def build_batch_preprocessor(self) -> BatchPreprocessor: return DiscreteDqnBatchPreprocessor( # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `action_names`. num_actions=len(self.action_names), state_preprocessor=Preprocessor( normalization_parameters=self.state_normalization_parameters, use_gpu=self.use_gpu, ), use_gpu=self.use_gpu, ) def train(self, train_dataset: Dataset, eval_dataset: Optional[Dataset], num_epochs: int) -> RLTrainingOutput: """ Train the model Returns partially filled RLTrainingOutput. The field that should not be filled are: - output_path """ reporter = DiscreteDQNReporter( # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `trainer_param`. self.trainer_param.actions, target_action_distribution=self.target_action_distribution, ) # pyre-fixme[16]: `RLTrainer` has no attribute `add_observer`. self.trainer.add_observer(reporter) evaluator = Evaluator( # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `action_names`. self.action_names, # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `rl_parameters`. self.rl_parameters.gamma, self.trainer, metrics_to_score=self.metrics_to_score, ) # pyre-fixme[16]: `Evaluator` has no attribute `add_observer`. evaluator.add_observer(reporter) batch_preprocessor = self.build_batch_preprocessor() train_and_evaluate_generic( train_dataset, eval_dataset, self.trainer, num_epochs, self.use_gpu, batch_preprocessor, reporter, evaluator, reader_options=self.reader_options, ) # pyre-fixme[16]: `RLTrainingReport` has no attribute `make_union_instance`. training_report = RLTrainingReport.make_union_instance( reporter.generate_training_report()) return RLTrainingOutput(training_report=training_report)
class DiscreteDQNBase(ModelManager): target_action_distribution: Optional[List[float]] = None state_feature_config: rlt.ModelFeatureConfig = field( default_factory=lambda: rlt.ModelFeatureConfig(float_feature_infos=[]) ) preprocessing_options: Optional[PreprocessingOptions] = None reader_options: Optional[ReaderOptions] = None def __post_init_post_parse__(self): super().__init__() self._metrics_to_score = None self._q_network: Optional[ModelBase] = None @classmethod def normalization_key(cls) -> str: return DiscreteNormalizationParameterKeys.STATE def create_policy(self, serving: bool) -> Policy: """ Create an online DiscreteDQN Policy from env. """ from reagent.gym.policies.samplers.discrete_sampler import SoftmaxActionSampler from reagent.gym.policies.scorers.discrete_scorer import ( discrete_dqn_scorer, discrete_dqn_serving_scorer, ) sampler = SoftmaxActionSampler(temperature=self.rl_parameters.temperature) if serving: scorer = discrete_dqn_serving_scorer( DiscreteDqnPredictorUnwrapper(self.build_serving_module()) ) else: scorer = discrete_dqn_scorer(self.trainer.q_network) return Policy(scorer=scorer, sampler=sampler) @property def metrics_to_score(self) -> List[str]: assert self.reward_options is not None if self._metrics_to_score is None: self._metrics_to_score = get_metrics_to_score( self._reward_options.metric_reward_values ) return self._metrics_to_score @property def should_generate_eval_dataset(self) -> bool: return self.eval_parameters.calc_cpe_in_training def _set_normalization_parameters( self, normalization_data_map: Dict[str, NormalizationData] ): """ Set normalization parameters on current instance """ state_norm_data = normalization_data_map.get(self.normalization_key(), None) assert state_norm_data is not None assert state_norm_data.dense_normalization_parameters is not None self.state_normalization_parameters = ( state_norm_data.dense_normalization_parameters ) def run_feature_identification( self, input_table_spec: TableSpec ) -> Dict[str, NormalizationData]: preprocessing_options = self.preprocessing_options or PreprocessingOptions() logger.info("Overriding whitelist_features") state_features = [ ffi.feature_id for ffi in self.state_feature_config.float_feature_infos ] preprocessing_options = preprocessing_options._replace( whitelist_features=state_features ) state_normalization_parameters = identify_normalization_parameters( input_table_spec, "state_features", preprocessing_options ) return { DiscreteNormalizationParameterKeys.STATE: NormalizationData( dense_normalization_parameters=state_normalization_parameters ) } def query_data( self, input_table_spec: TableSpec, sample_range: Optional[Tuple[float, float]], reward_options: RewardOptions, ) -> Dataset: return query_data( input_table_spec=input_table_spec, actions=self.action_names, sample_range=sample_range, custom_reward_expression=reward_options.custom_reward_expression, multi_steps=self.multi_steps, gamma=self.rl_parameters.gamma, ) @property def multi_steps(self) -> Optional[int]: return self.rl_parameters.multi_steps def build_batch_preprocessor(self) -> BatchPreprocessor: return DiscreteDqnBatchPreprocessor( num_actions=len(self.action_names), state_preprocessor=Preprocessor( normalization_parameters=self.state_normalization_parameters, use_gpu=self.use_gpu, ), use_gpu=self.use_gpu, ) def train( self, train_dataset: Dataset, eval_dataset: Optional[Dataset], num_epochs: int ) -> RLTrainingOutput: """ Train the model Returns partially filled RLTrainingOutput. The field that should not be filled are: - output_path - warmstart_output_path - vis_metrics - validation_output """ logger.info("Creating reporter") reporter = DiscreteDQNReporter( self.trainer_param.actions, target_action_distribution=self.target_action_distribution, ) logger.info("Adding reporter to trainer") self.trainer.add_observer(reporter) training_page_handler = TrainingPageHandler(self.trainer) training_page_handler.add_observer(reporter) evaluator = Evaluator( self.action_names, self.rl_parameters.gamma, self.trainer, metrics_to_score=self.metrics_to_score, ) logger.info("Adding reporter to evaluator") evaluator.add_observer(reporter) evaluation_page_handler = EvaluationPageHandler( self.trainer, evaluator, reporter ) batch_preprocessor = self.build_batch_preprocessor() train_and_evaluate_generic( train_dataset, eval_dataset, self.trainer, num_epochs, self.use_gpu, batch_preprocessor, training_page_handler, evaluation_page_handler, reader_options=self.reader_options, ) training_report = RLTrainingReport.make_union_instance( reporter.generate_training_report() ) return RLTrainingOutput(training_report=training_report)