Пример #1
0
    def __init__(
        self,
        experiment_name: str,
        vocab: Vocabulary,
        question_embedder: TextFieldEmbedder,
        schema_encoder: Seq2SeqEncoder,
        beam_encoder: Seq2SeqEncoder,
        tree_rep_transformer: Seq2SeqEncoder,
        utterance_augmenter: Seq2SeqEncoder,
        beam_summarizer: Seq2SeqEncoder,
        decoder_timesteps=9,
        beam_size=30,
        misc_params=None,
        dropout: float = 0.1,
    ) -> None:
        super().__init__(vocab)
        self._experiment_name = experiment_name
        self._misc_params = misc_params
        self.set_flags()
        self._utterance_augmenter = utterance_augmenter
        self._action_dim = beam_encoder.get_output_dim()
        self._beam_size = beam_size
        self._n_schema_leafs = 15
        self._num_values = 10

        self.tokenizer = TokenIndexer.by_name("pretrained_transformer")(
            model_name="Salesforce/grappa_large_jnt"
        )._allennlp_tokenizer.tokenizer

        if not self.cntx_reranker:
            self._noreranker_cntx_linear = torch.nn.Linear(
                in_features=self._action_dim,
                out_features=2 * self._action_dim)
        if not self.utt_aug:
            self._nobeam_cntx_linear = torch.nn.Linear(
                in_features=self._action_dim,
                out_features=2 * self._action_dim)
        self.activation_func = torch.nn.ReLU
        if self.lin_after_cntx:
            self.cntx_linear = torch.nn.Sequential(
                torch.nn.Linear(2 * self._action_dim, 4 * self._action_dim),
                torch.nn.Dropout(p=dropout),
                torch.nn.LayerNorm(4 * self._action_dim),
                self.activation_func(),
                torch.nn.Linear(4 * self._action_dim, 2 * self._action_dim),
            )
        if self.cntx_rep:
            self._cntx_rep_linear = torch.nn.Linear(
                in_features=self._action_dim,
                out_features=2 * self._action_dim)
        self._create_action_dicts()
        self.op_count = self.binary_op_count + self.unary_op_count
        self.xent = torch.nn.CrossEntropyLoss()

        self.type_embedding = torch.nn.Embedding(self.op_count,
                                                 self._action_dim)
        self.summrize_vec = torch.nn.Embedding(num_embeddings=1,
                                               embedding_dim=self._action_dim)

        self.d_frontier = 2 * self._action_dim
        self.left_emb = torch.nn.Linear(in_features=self.d_frontier,
                                        out_features=self.d_frontier)
        self.right_emb = torch.nn.Linear(in_features=self.d_frontier,
                                         out_features=self.d_frontier)
        self.after_add = torch.nn.Sequential(
            torch.nn.Linear(self.d_frontier, self.d_frontier),
            torch.nn.Dropout(p=dropout),
            torch.nn.LayerNorm(self.d_frontier),
            self.activation_func(),
            torch.nn.Linear(self.d_frontier, self.d_frontier),
        )
        self._unary_frontier_embedder = torch.nn.Sequential(
            torch.nn.Linear(self.d_frontier, self.d_frontier),
            torch.nn.Dropout(p=dropout),
            torch.nn.LayerNorm(self.d_frontier),
            self.activation_func(),
            torch.nn.Linear(self.d_frontier, self.d_frontier),
        )

        self.op_linear = torch.nn.Linear(in_features=self.d_frontier,
                                         out_features=self.op_count)
        self.pre_op_linear = torch.nn.Sequential(
            torch.nn.Linear(self.d_frontier, self.d_frontier),
            torch.nn.Dropout(p=dropout),
            torch.nn.LayerNorm(self.d_frontier),
            self.activation_func(),
        )

        assert (self._action_dim % 2) == 0
        self.vocab = vocab
        self._question_embedder = question_embedder
        self._schema_encoder = schema_encoder
        self._beam_encoder = beam_encoder
        self._beam_summarizer = beam_summarizer

        self._tree_rep_transformer = tree_rep_transformer

        self._decoder_timesteps = decoder_timesteps
        self._beam_size = beam_size
        self.q_emb_dim = question_embedder.get_output_dim()

        self.dropout_prob = dropout
        self._action_dim = beam_encoder.get_output_dim()
        self._span_score_func = torch.nn.Linear(self._action_dim, 2)
        self._pooler = BagOfEmbeddingsEncoder(embedding_dim=self._action_dim)

        self._rank_schema = torch.nn.Sequential(
            torch.nn.Linear(self._action_dim, self._action_dim),
            torch.nn.Dropout(p=dropout),
            torch.nn.LayerNorm(self._action_dim),
            torch.nn.Tanh(),
            torch.nn.Linear(self._action_dim, 1),
        )
        self._rank_beam = torch.nn.Sequential(
            torch.nn.Linear(2 * self._action_dim, 2 * self._action_dim),
            torch.nn.Dropout(p=dropout),
            torch.nn.LayerNorm(2 * self._action_dim),
            torch.nn.Tanh(),
            torch.nn.Linear(2 * self._action_dim, 1),
        )
        self._emb_to_action_dim = torch.nn.Linear(
            in_features=self.q_emb_dim,
            out_features=self._action_dim,
        )

        self._create_type_tensor()

        self._bce_loss = torch.nn.BCEWithLogitsLoss(reduction="none")

        self._softmax = torch.nn.Softmax(dim=1)
        self._final_beam_acc = Average()
        self._reranker_acc = Average()
        self._spider_acc = Average()

        self._leafs_acc = Average()
        self._batch_size = -1
        self._device = None
        self._evaluate_func = partial(
            evaluate_single,
            db_dir=os.path.join("dataset", "database"),
            table_file=os.path.join("dataset", "tables.json"),
        )
Пример #2
0
 def test_read_from_file(self, lazy):
     reader = LevenshteinReader(
         token_indexers={"tokens": TokenIndexer.by_name("single_id")()},
         lazy=False)
     self._check_outputs(reader)