Пример #1
0
 def test_discrete_action(self):
     state_dim = 8
     action_dim = 4
     model = DuelingQNetwork(
         layers=[state_dim, 8, 4, action_dim],
         activations=["relu", "relu", "linear"],
         use_batch_norm=True,
     )
     input = model.input_prototype()
     self.assertEqual((1, state_dim), input.state.float_features.shape)
     # Using batch norm requires more than 1 example in training, avoid that
     model.eval()
     q_values = model(input)
     self.assertEqual((1, action_dim), q_values.q_values.shape)
Пример #2
0
 def test_save_load_discrete_action_batch_norm(self):
     state_dim = 8
     action_dim = 4
     model = DuelingQNetwork(
         layers=[state_dim, 8, 4, action_dim],
         activations=["relu", "relu", "linear"],
         use_batch_norm=False,
     )
     # Freezing batch_norm
     model.eval()
     # Number of expected params is the same because DuelingQNetwork always
     # initialize batch norm layer even if it doesn't use it.
     expected_num_params, expected_num_inputs, expected_num_outputs = 22, 1, 1
     check_save_load(self, model, expected_num_params, expected_num_inputs,
                     expected_num_outputs)
Пример #3
0
    def setUp(self):
        # preparing various components for qr-dqn trainer initialization
        self.batch_size = 3
        self.state_dim = 10
        self.action_dim = 2
        self.num_layers = 2
        self.sizes = [20 for _ in range(self.num_layers)]
        self.activations = ["relu" for _ in range(self.num_layers)]
        self.use_layer_norm = False
        self.softmax_temperature = 1

        self.actions = [str(i) for i in range(self.action_dim)]
        self.params = PPOTrainerParameters(actions=self.actions, normalize=False)
        self.reward_options = RewardOptions()
        self.metrics_to_score = get_metrics_to_score(
            self.reward_options.metric_reward_values
        )

        self.policy_network = DuelingQNetwork.make_fully_connected(
            state_dim=self.state_dim,
            action_dim=self.action_dim,
            layers=self.sizes,
            activations=self.activations,
        )
        self.sampler = SoftmaxActionSampler(temperature=self.softmax_temperature)
        self.policy = Policy(scorer=self.policy_network, sampler=self.sampler)

        self.value_network = FloatFeatureFullyConnected(
            state_dim=self.state_dim,
            output_dim=1,
            sizes=self.sizes,
            activations=self.activations,
            use_layer_norm=self.use_layer_norm,
        )
Пример #4
0
 def build_q_network(
     self,
     state_feature_config: rlt.ModelFeatureConfig,
     state_normalization_data: NormalizationData,
     output_dim: int,
 ) -> ModelBase:
     state_dim = self._get_input_dim(state_normalization_data)
     return DuelingQNetwork.make_fully_connected(
         state_dim, output_dim, self.sizes, self.activations
     )
 def test_save_load_discrete_action(self):
     state_dim = 8
     action_dim = 4
     model = DuelingQNetwork.make_fully_connected(
         state_dim, action_dim, layers=[8, 4], activations=["relu", "relu"]
     )
     expected_num_params, expected_num_inputs, expected_num_outputs = 22, 1, 1
     check_save_load(
         self, model, expected_num_params, expected_num_inputs, expected_num_outputs
     )
Пример #6
0
 def build_q_network(
     self,
     state_feature_config: rlt.ModelFeatureConfig,
     state_normalization_parameters: Dict[int, NormalizationParameters],
     output_dim: int,
 ) -> ModelBase:
     state_dim = self._get_input_dim(state_normalization_parameters)
     return DuelingQNetwork(
         layers=[state_dim] + self.sizes + [output_dim],
         activations=self.activations + ["linear"],
     )
Пример #7
0
 def test_save_load_discrete_action(self):
     state_dim = 8
     action_dim = 4
     model = DuelingQNetwork(
         layers=[state_dim, 8, 4, action_dim],
         activations=["relu", "relu", "linear"],
         use_batch_norm=False,
     )
     expected_num_params, expected_num_inputs, expected_num_outputs = 22, 1, 1
     check_save_load(self, model, expected_num_params, expected_num_inputs,
                     expected_num_outputs)
Пример #8
0
 def build_q_network(
     self,
     state_normalization_data: NormalizationData,
     output_dim: int,
     num_atoms: int,
 ) -> ModelBase:
     state_dim = self._get_input_dim(state_normalization_data)
     return DuelingQNetwork.make_fully_connected(
         state_dim,
         output_dim,
         layers=self.sizes,
         activations=self.activations,
         num_atoms=num_atoms,
     )
Пример #9
0
def create_dqn_trainer_from_params(
    model: DiscreteActionModelParameters,
    normalization_parameters: Dict[int, NormalizationParameters],
    use_gpu: bool = False,
    use_all_avail_gpus: bool = False,
    metrics_to_score=None,
):
    metrics_to_score = metrics_to_score or []

    if model.rainbow.quantile:
        q_network = QuantileDQN(
            state_dim=get_num_output_features(normalization_parameters),
            action_dim=len(model.actions),
            num_atoms=model.rainbow.num_atoms,
            sizes=model.training.layers[1:-1],
            activations=model.training.activations[:-1],
            dropout_ratio=model.training.dropout_ratio,
        )
    elif model.rainbow.categorical:
        q_network = CategoricalDQN(  # type: ignore
            state_dim=get_num_output_features(normalization_parameters),
            action_dim=len(model.actions),
            num_atoms=model.rainbow.num_atoms,
            qmin=model.rainbow.qmin,
            qmax=model.rainbow.qmax,
            sizes=model.training.layers[1:-1],
            activations=model.training.activations[:-1],
            dropout_ratio=model.training.dropout_ratio,
            use_gpu=use_gpu,
        )
    elif model.rainbow.dueling_architecture:
        q_network = DuelingQNetwork(  # type: ignore
            layers=[get_num_output_features(normalization_parameters)] +
            model.training.layers[1:-1] + [len(model.actions)],
            activations=model.training.activations,
        )
    else:
        q_network = FullyConnectedDQN(  # type: ignore
            state_dim=get_num_output_features(normalization_parameters),
            action_dim=len(model.actions),
            sizes=model.training.layers[1:-1],
            activations=model.training.activations[:-1],
            dropout_ratio=model.training.dropout_ratio,
        )

    if use_gpu and torch.cuda.is_available():
        q_network = q_network.cuda()

    q_network_target = q_network.get_target_network()

    reward_network, q_network_cpe, q_network_cpe_target = None, None, None
    if model.evaluation.calc_cpe_in_training:
        # Metrics + reward
        num_output_nodes = (len(metrics_to_score) + 1) * len(model.actions)
        reward_network = FullyConnectedDQN(
            state_dim=get_num_output_features(normalization_parameters),
            action_dim=num_output_nodes,
            sizes=model.training.layers[1:-1],
            activations=model.training.activations[:-1],
            dropout_ratio=model.training.dropout_ratio,
        )
        q_network_cpe = FullyConnectedDQN(
            state_dim=get_num_output_features(normalization_parameters),
            action_dim=num_output_nodes,
            sizes=model.training.layers[1:-1],
            activations=model.training.activations[:-1],
            dropout_ratio=model.training.dropout_ratio,
        )

        if use_gpu and torch.cuda.is_available():
            reward_network.cuda()
            q_network_cpe.cuda()

        q_network_cpe_target = q_network_cpe.get_target_network()

    if (use_all_avail_gpus and not model.rainbow.categorical
            and not model.rainbow.quantile):
        q_network = q_network.get_distributed_data_parallel_model()
        reward_network = (reward_network.get_distributed_data_parallel_model()
                          if reward_network else None)
        q_network_cpe = (q_network_cpe.get_distributed_data_parallel_model()
                         if q_network_cpe else None)

    if model.rainbow.quantile:
        assert (not use_all_avail_gpus
                ), "use_all_avail_gpus not implemented for distributional RL"
        parameters = QRDQNTrainerParameters.from_discrete_action_model_parameters(
            model)
        return QRDQNTrainer(
            q_network,
            q_network_target,
            parameters,
            use_gpu,
            metrics_to_score=metrics_to_score,
            reward_network=reward_network,
            q_network_cpe=q_network_cpe,
            q_network_cpe_target=q_network_cpe_target,
        )

    elif model.rainbow.categorical:
        assert (not use_all_avail_gpus
                ), "use_all_avail_gpus not implemented for distributional RL"
        return C51Trainer(
            q_network,
            q_network_target,
            C51TrainerParameters.from_discrete_action_model_parameters(model),
            use_gpu,
            metrics_to_score=metrics_to_score,
        )

    else:
        parameters = DQNTrainerParameters.from_discrete_action_model_parameters(
            model)
        return DQNTrainer(
            q_network,
            q_network_target,
            reward_network,
            parameters,
            use_gpu,
            q_network_cpe=q_network_cpe,
            q_network_cpe_target=q_network_cpe_target,
            metrics_to_score=metrics_to_score,
        )