def test_lstm_synthetic_reward(self):
        state_dim = 10
        action_dim = 2
        last_layer_activation = "leaky_relu"
        net = SequenceSyntheticRewardNet(
            state_dim=state_dim,
            action_dim=action_dim,
            lstm_hidden_size=128,
            lstm_num_layers=2,
            lstm_bidirectional=True,
            last_layer_activation=last_layer_activation,
        )
        reward_net = SyntheticRewardNet(net)
        lstm = reward_net.export_mlp().lstm
        assert lstm.bidirectional
        assert lstm.input_size == 12
        assert lstm.hidden_size == 128
        assert lstm.num_layers == 2

        dnn = reward_net.export_mlp().fc_out
        assert dnn.in_features == 128 * 2
        assert dnn.out_features == 1

        output_activation = reward_net.export_mlp().output_activation
        assert output_activation._get_name() == "LeakyReLU"
    def test_ngram_conv_net_synthetic_reward(self):
        state_dim = 10
        action_dim = 2
        sizes = [256, 128]
        activations = ["sigmoid", "relu"]
        last_layer_activation = "leaky_relu"
        context_size = 3

        conv_net_params = rlp.ConvNetParameters(
            conv_dims=[256, 128],
            conv_height_kernels=[1, 1],
            pool_types=["max", "max"],
            pool_kernel_sizes=[1, 1],
        )
        net = NGramConvolutionalNetwork(
            state_dim=state_dim,
            action_dim=action_dim,
            sizes=sizes,
            activations=activations,
            last_layer_activation=last_layer_activation,
            context_size=context_size,
            conv_net_params=conv_net_params,
        )

        reward_net = SyntheticRewardNet(net)
        conv_net = reward_net.export_mlp().conv_net

        assert conv_net.conv_dims == [1, 256, 128]
        assert conv_net.conv_height_kernels == [1, 1]
        assert conv_net.conv_width_kernels == [12, 1]

        assert conv_net.conv_layers[0].in_channels == 1
        assert conv_net.conv_layers[0].out_channels == 256
        assert conv_net.conv_layers[0].kernel_size == (1, 12)
        assert conv_net.conv_layers[0].stride == (1, 1)
        assert conv_net.conv_layers[1].in_channels == 256
        assert conv_net.conv_layers[1].out_channels == 128
        assert conv_net.conv_layers[1].kernel_size == (1, 1)
        assert conv_net.conv_layers[1].stride == (1, 1)

        dnn = reward_net.export_mlp().conv_net.feed_forward.dnn
        assert dnn[0].in_features == 384
        assert dnn[0].out_features == 256
        assert dnn[1]._get_name() == "Sigmoid"
        assert dnn[2].in_features == 256
        assert dnn[2].out_features == 128
        assert dnn[3]._get_name() == "ReLU"
        assert dnn[4].in_features == 128
        assert dnn[4].out_features == 1
        assert dnn[5]._get_name() == "LeakyReLU"
    def test_transformer_synthetic_reward(self):
        state_dim = 10
        action_dim = 2
        d_model = 64
        nhead = 8
        num_encoder_layers = 2
        dim_feedforward = 64
        dropout = 0.0
        activation = "relu"
        last_layer_activation = "leaky_relu"
        layer_norm_eps = 1e-5
        max_len = 10

        net = TransformerSyntheticRewardNet(
            state_dim=state_dim,
            action_dim=action_dim,
            d_model=d_model,
            nhead=nhead,
            num_encoder_layers=num_encoder_layers,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            activation=activation,
            last_layer_activation=last_layer_activation,
            layer_norm_eps=layer_norm_eps,
            max_len=max_len,
        )

        reward_net = SyntheticRewardNet(net)
        export_net = reward_net.export_mlp()
        transformer = export_net.transformer
        assert export_net.state_dim == state_dim
        assert export_net.action_dim == action_dim
        assert export_net.d_model == d_model
        assert export_net.nhead == nhead
        assert export_net.dim_feedforward == dim_feedforward
        assert export_net.dropout == dropout
        assert export_net.activation == activation
        assert export_net.layer_norm_eps == layer_norm_eps

        assert transformer.num_layers == num_encoder_layers
        dnn_out = export_net.fc_out
        assert dnn_out.in_features == d_model
        assert dnn_out.out_features == 1

        output_activation = export_net.output_activation
        assert output_activation._get_name() == "LeakyReLU"
    def test_ngram_fc_synthetic_reward(self):
        state_dim = 10
        action_dim = 2
        sizes = [256, 128]
        activations = ["sigmoid", "relu"]
        last_layer_activation = "leaky_relu"
        context_size = 3

        net = NGramFullyConnectedNetwork(
            state_dim=state_dim,
            action_dim=action_dim,
            sizes=sizes,
            activations=activations,
            last_layer_activation=last_layer_activation,
            context_size=context_size,
        )
        reward_net = SyntheticRewardNet(net)

        dnn = reward_net.export_mlp().fc.dnn
        assert dnn[0].in_features == (state_dim + action_dim) * context_size
        assert dnn[0].out_features == 256
        assert dnn[1]._get_name() == "Sigmoid"
        assert dnn[2].in_features == 256
        assert dnn[2].out_features == 128
        assert dnn[3]._get_name() == "ReLU"
        assert dnn[4].in_features == 128
        assert dnn[4].out_features == 1
        assert dnn[5]._get_name() == "LeakyReLU"

        valid_step = torch.tensor([[1], [2], [3]])
        batch_size = 3
        seq_len = 4
        mask = _gen_mask(valid_step, batch_size, seq_len)
        assert torch.all(
            mask
            == torch.tensor(
                [[0.0, 0.0, 0.0, 1.0], [0.0, 0.0, 1.0, 1.0], [0.0, 1.0, 1.0, 1.0]]
            )
        )
    def test_single_step_synthetic_reward(self):
        state_dim = 10
        action_dim = 2
        sizes = [256, 128]
        activations = ["sigmoid", "relu"]
        last_layer_activation = "leaky_relu"
        reward_net = SyntheticRewardNet(
            SingleStepSyntheticRewardNet(
                state_dim=state_dim,
                action_dim=action_dim,
                sizes=sizes,
                activations=activations,
                last_layer_activation=last_layer_activation,
            )
        )
        dnn = reward_net.export_mlp().dnn
        # dnn[0] is a concat layer
        assert dnn[1].in_features == state_dim + action_dim
        assert dnn[1].out_features == 256
        assert dnn[2]._get_name() == "Sigmoid"
        assert dnn[3].in_features == 256
        assert dnn[3].out_features == 128
        assert dnn[4]._get_name() == "ReLU"
        assert dnn[5].in_features == 128
        assert dnn[5].out_features == 1
        assert dnn[6]._get_name() == "LeakyReLU"

        valid_step = torch.tensor([[1], [2], [3]])
        batch_size = 3
        seq_len = 4
        mask = _gen_mask(valid_step, batch_size, seq_len)
        assert torch.all(
            mask
            == torch.tensor(
                [[0.0, 0.0, 0.0, 1.0], [0.0, 0.0, 1.0, 1.0], [0.0, 1.0, 1.0, 1.0]]
            )
        )