示例#1
0
 def _get_unmasked_q_values(self, q_network, state: rlt.FeatureData,
                            slate: rlt.DocList) -> torch.Tensor:
     """ Gets the q values from the model and target networks """
     batch_size, slate_size, _ = slate.float_features.shape
     # TODO: Probably should create a new model type
     return q_network(state.repeat_interleave(slate_size, dim=0),
                      slate.as_feature_data()).view(batch_size, slate_size)
示例#2
0
    def score(state: rlt.FeatureData) -> torch.Tensor:
        tiled_state = state.repeat_interleave(repeats=num_candidates, axis=0)
        candidate_docs = state.candidate_docs
        assert candidate_docs is not None
        actions = candidate_docs.as_feature_data()

        q_network.eval()
        scores = q_network(tiled_state, actions).view(-1, num_candidates)
        q_network.train()

        select_prob = F.softmax(candidate_docs.value, dim=1)
        assert select_prob.shape == scores.shape

        return select_prob * scores