def test_discrete_wrapper(self): ids = range(1, 5) state_normalization_parameters = {i: _cont_norm() for i in ids} state_preprocessor = Preprocessor(state_normalization_parameters, False) action_dim = 2 dqn = models.FullyConnectedDQN( state_dim=len(state_normalization_parameters), action_dim=action_dim, sizes=[16], activations=["relu"], ) state_feature_config = rlt.ModelFeatureConfig(float_feature_infos=[ rlt.FloatFeatureInfo(feature_id=i, name=f"feat_{i}") for i in ids ]) dqn_with_preprocessor = DiscreteDqnWithPreprocessor( dqn, state_preprocessor, state_feature_config) action_names = ["L", "R"] wrapper = DiscreteDqnPredictorWrapper(dqn_with_preprocessor, action_names, state_feature_config) input_prototype = dqn_with_preprocessor.input_prototype()[0] output_action_names, q_values = wrapper(input_prototype) self.assertEqual(action_names, output_action_names) self.assertEqual(q_values.shape, (1, 2)) state_with_presence = input_prototype.float_features_with_presence expected_output = dqn( rlt.FeatureData(state_preprocessor(*state_with_presence))) self.assertTrue((expected_output == q_values).all())
def test_discrete_wrapper_with_id_list(self): state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)} state_preprocessor = Preprocessor(state_normalization_parameters, False) action_dim = 2 state_feature_config = rlt.ModelFeatureConfig( float_feature_infos=[ rlt.FloatFeatureInfo(name=str(i), feature_id=i) for i in range(1, 5) ], id_list_feature_configs=[ rlt.IdListFeatureConfig(name="A", feature_id=10, id_mapping_name="A_mapping") ], id_mapping_config={"A_mapping": rlt.IdMapping(ids=[0, 1, 2])}, ) embedding_concat = models.EmbeddingBagConcat( state_dim=len(state_normalization_parameters), model_feature_config=state_feature_config, embedding_dim=8, ) dqn = models.Sequential( embedding_concat, rlt.TensorFeatureData(), models.FullyConnectedDQN( embedding_concat.output_dim, action_dim=action_dim, sizes=[16], activations=["relu"], ), ) dqn_with_preprocessor = DiscreteDqnWithPreprocessor( dqn, state_preprocessor, state_feature_config) action_names = ["L", "R"] wrapper = DiscreteDqnPredictorWrapper(dqn_with_preprocessor, action_names, state_feature_config) input_prototype = dqn_with_preprocessor.input_prototype()[0] output_action_names, q_values = wrapper(input_prototype) self.assertEqual(action_names, output_action_names) self.assertEqual(q_values.shape, (1, 2)) feature_id_to_name = { config.feature_id: config.name for config in state_feature_config.id_list_feature_configs } state_id_list_features = { feature_id_to_name[k]: v for k, v in input_prototype.id_list_features.items() } state_with_presence = input_prototype.float_features_with_presence expected_output = dqn( rlt.FeatureData( float_features=state_preprocessor(*state_with_presence), id_list_features=state_id_list_features, )) self.assertTrue((expected_output == q_values).all())
def build_q_network( self, state_feature_config: rlt.ModelFeatureConfig, state_normalization_data: NormalizationData, output_dim: int, ) -> models.ModelBase: state_dim = self._get_input_dim(state_normalization_data) embedding_concat = models.EmbeddingBagConcat( state_dim=state_dim, model_feature_config=state_feature_config, embedding_dim=self.embedding_dim, ) return models.Sequential( # type: ignore embedding_concat, rlt.TensorFeatureData(), models.FullyConnectedDQN( embedding_concat.output_dim, action_dim=output_dim, sizes=self.sizes, activations=self.activations, dropout_ratio=self.dropout_ratio, ), )
def test_discrete_wrapper_with_id_list_none(self): state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)} state_preprocessor = Preprocessor(state_normalization_parameters, False) action_dim = 2 dqn = models.FullyConnectedDQN( state_dim=len(state_normalization_parameters), action_dim=action_dim, sizes=[16], activations=["relu"], ) dqn_with_preprocessor = DiscreteDqnWithPreprocessorWithIdList( dqn, state_preprocessor) action_names = ["L", "R"] wrapper = DiscreteDqnPredictorWrapperWithIdList( dqn_with_preprocessor, action_names) input_prototype = dqn_with_preprocessor.input_prototype() output_action_names, q_values = wrapper(*input_prototype) self.assertEqual(action_names, output_action_names) self.assertEqual(q_values.shape, (1, 2)) expected_output = dqn( rlt.FeatureData(state_preprocessor(*input_prototype[0]))) self.assertTrue((expected_output == q_values).all())
def get_modular_sarsa_trainer_reward_boost( self, environment, reward_shape, dueling, categorical, quantile, use_gpu=False, use_all_avail_gpus=False, clip_grad_norm=None, ): assert not quantile or not categorical parameters = self.get_sarsa_parameters( environment, reward_shape, dueling, categorical, quantile, clip_grad_norm ) def make_dueling_dqn(num_atoms=None): return models.DuelingQNetwork.make_fully_connected( state_dim=get_num_output_features(environment.normalization), action_dim=len(environment.ACTIONS), layers=parameters.training.layers[1:-1], activations=parameters.training.activations[:-1], num_atoms=num_atoms, ) if quantile: if dueling: q_network = make_dueling_dqn(num_atoms=parameters.rainbow.num_atoms) else: q_network = models.FullyConnectedDQN( state_dim=get_num_output_features(environment.normalization), action_dim=len(environment.ACTIONS), num_atoms=parameters.rainbow.num_atoms, sizes=parameters.training.layers[1:-1], activations=parameters.training.activations[:-1], ) elif categorical: assert not dueling distributional_network = models.FullyConnectedDQN( state_dim=get_num_output_features(environment.normalization), action_dim=len(environment.ACTIONS), num_atoms=parameters.rainbow.num_atoms, sizes=parameters.training.layers[1:-1], activations=parameters.training.activations[:-1], ) q_network = models.CategoricalDQN( distributional_network, qmin=-100, qmax=200, num_atoms=parameters.rainbow.num_atoms, ) else: if dueling: q_network = make_dueling_dqn() else: q_network = models.FullyConnectedDQN( state_dim=get_num_output_features(environment.normalization), action_dim=len(environment.ACTIONS), sizes=parameters.training.layers[1:-1], activations=parameters.training.activations[:-1], ) q_network_cpe, q_network_cpe_target, reward_network = None, None, None if parameters.evaluation and parameters.evaluation.calc_cpe_in_training: q_network_cpe = models.FullyConnectedDQN( state_dim=get_num_output_features(environment.normalization), action_dim=len(environment.ACTIONS), sizes=parameters.training.layers[1:-1], activations=parameters.training.activations[:-1], ) q_network_cpe_target = q_network_cpe.get_target_network() reward_network = models.FullyConnectedDQN( state_dim=get_num_output_features(environment.normalization), action_dim=len(environment.ACTIONS), sizes=parameters.training.layers[1:-1], activations=parameters.training.activations[:-1], ) if use_gpu: q_network = q_network.cuda() if parameters.evaluation.calc_cpe_in_training: reward_network = reward_network.cuda() q_network_cpe = q_network_cpe.cuda() q_network_cpe_target = q_network_cpe_target.cuda() if use_all_avail_gpus and not categorical: q_network = q_network.get_distributed_data_parallel_model() reward_network = reward_network.get_distributed_data_parallel_model() q_network_cpe = q_network_cpe.get_distributed_data_parallel_model() q_network_cpe_target = ( q_network_cpe_target.get_distributed_data_parallel_model() ) if quantile: parameters = QRDQNTrainerParameters.from_discrete_action_model_parameters( parameters ) trainer = QRDQNTrainer( q_network, q_network.get_target_network(), parameters, use_gpu, reward_network=reward_network, q_network_cpe=q_network_cpe, q_network_cpe_target=q_network_cpe_target, ) elif categorical: parameters = C51TrainerParameters.from_discrete_action_model_parameters( parameters ) trainer = C51Trainer( q_network, q_network.get_target_network(), parameters, use_gpu ) else: parameters = DQNTrainerParameters.from_discrete_action_model_parameters( parameters ) trainer = DQNTrainer( q_network, q_network.get_target_network(), reward_network, parameters, use_gpu, q_network_cpe=q_network_cpe, q_network_cpe_target=q_network_cpe_target, ) return trainer