Exemplo n.º 1
0
    def __init__(
        self,
        vocab: Vocabulary,
        embed: TextFieldEmbedder,
        encoder_size: int,
        decoder_size: int,
        num_layers: int,
        beam_size: int,
        max_decoding_steps: int,
        use_bleu: bool = True,
        initializer: InitializerApplicator = InitializerApplicator()
    ) -> None:
        super().__init__(vocab)

        self.START, self.END = self.vocab.get_token_index(
            START_SYMBOL), self.vocab.get_token_index(END_SYMBOL)
        self.OOV = self.vocab.get_token_index(self.vocab._oov_token)  # pylint: disable=protected-access
        self.PAD = self.vocab.get_token_index(self.vocab._padding_token)  # pylint: disable=protected-access
        self.COPY = self.vocab.get_token_index("@@COPY@@")
        self.KEEP = self.vocab.get_token_index("@@KEEP@@")
        self.DROP = self.vocab.get_token_index("@@DROP@@")

        self.SYMBOL = (self.START, self.END, self.PAD, self.KEEP, self.DROP)
        self.vocab_size = vocab.get_vocab_size()
        self.EMB = embed

        self.emb_size = self.EMB.token_embedder_tokens.output_dim
        self.encoder_size, self.decoder_size = encoder_size, decoder_size
        self.FACT_ENCODER = FeedForward(3 * self.emb_size, 1, encoder_size,
                                        nn.Tanh())
        self.ATTN = AdditiveAttention(encoder_size + decoder_size,
                                      encoder_size)
        self.COPY_ATTN = AdditiveAttention(decoder_size, encoder_size)
        module = nn.LSTM(self.emb_size,
                         encoder_size // 2,
                         num_layers,
                         bidirectional=True,
                         batch_first=True)
        self.BUFFER = PytorchSeq2SeqWrapper(
            module)  # BiLSTM to encode draft text
        self.STREAM = nn.LSTMCell(2 * encoder_size,
                                  decoder_size)  # Store revised text

        self.BEAM = BeamSearch(self.END,
                               max_steps=max_decoding_steps,
                               beam_size=beam_size)

        self.U = nn.Sequential(nn.Linear(2 * encoder_size, decoder_size),
                               nn.Tanh())
        self.ADD = nn.Sequential(nn.Linear(self.emb_size, encoder_size),
                                 nn.Tanh())

        self.P = nn.Sequential(
            nn.Linear(encoder_size + decoder_size, decoder_size), nn.Tanh())
        self.W = nn.Linear(decoder_size, self.vocab_size)
        self.G = nn.Sequential(nn.Linear(decoder_size, 1), nn.Sigmoid())

        initializer(self)
        self._bleu = BLEU(
            exclude_indices=set(self.SYMBOL)) if use_bleu else None
Exemplo n.º 2
0
def get_masked_copynet_with_attention(vocab: Vocabulary,
                                      max_decoding_steps: int = 20,
                                      beam_size: int = 1) -> MaskedCopyNet:

    word_embeddings = Embedding(
        num_embeddings=vocab.get_vocab_size("tokens"),
        embedding_dim=EMB_DIM
    )
    word_embeddings = BasicTextFieldEmbedder({"tokens": word_embeddings})

    masker_embeddings = Embedding(
        num_embeddings=vocab.get_vocab_size("mask_tokens"),
        embedding_dim=MASK_EMB_DIM
    )
    masker_embeddings = BasicTextFieldEmbedder({"tokens": masker_embeddings})

    attention = AdditiveAttention(vector_dim=HID_DIM * 2, matrix_dim=HID_DIM * 2)
    mask_attention = AdditiveAttention(vector_dim=HID_DIM * 2, matrix_dim=MASK_EMB_DIM)
    lstm = PytorchSeq2SeqWrapper(nn.LSTM(EMB_DIM, HID_DIM, batch_first=True, bidirectional=True))

    return MaskedCopyNet(
        vocab=vocab,
        embedder=word_embeddings,
        encoder=lstm,
        max_decoding_steps=max_decoding_steps,
        attention=attention,
        mask_embedder=masker_embeddings,
        mask_attention=mask_attention,
        beam_size=beam_size
    )
    def __init__(self,
                 encoder_output_dim: int,
                 action_embedding_dim: int,
                 input_attention: Attention,
                 past_attention: Attention,
                 activation: Activation = Activation.by_name('relu')(),
                 enable_gating: bool = True,
                 ablation_mode: str = None,
                 predict_start_type_separately: bool = True,
                 num_start_types: int = None,
                 add_action_bias: bool = True,
                 dropout: float = 0.0,
                 num_layers: int = 1) -> None:
        super().__init__(encoder_output_dim=encoder_output_dim,
                         action_embedding_dim=action_embedding_dim,
                         input_attention=input_attention,
                         activation=activation,
                         add_action_bias=add_action_bias,
                         dropout=dropout,
                         num_layers=num_layers)

        self.enable_gating = enable_gating
        self.ablation_mode = ablation_mode

        self._decoder_dim = encoder_output_dim
        self._input_projection_layer = Linear(
            encoder_output_dim + action_embedding_dim, encoder_output_dim)

        if add_action_bias:
            action_embedding_dim = action_embedding_dim + 1
        self._past_attention = AdditiveAttention(action_embedding_dim,
                                                 action_embedding_dim, True)
        self._action2gate = Linear(action_embedding_dim, 1)
        if self.ablation_mode != "wo_copy":
            self._past_copy_attention = AdditiveAttention(
                action_embedding_dim, action_embedding_dim, False)
            self._action2copygate = Linear(action_embedding_dim, 1)
        if self.ablation_mode == "wo_reuse_emb":
            self._output_projection_layer2 = Linear(encoder_output_dim * 2,
                                                    action_embedding_dim)
        self._ent2ent_ff = FeedForward(1, 1, 1, Activation.by_name('linear')())
        self._large_dropout = Dropout(0.3)
Exemplo n.º 4
0
def get_deep_levenshtein_copynet(
        masked_copynet: MaskedCopyNet) -> DeepLevenshtein:
    masked_copynet.eval()
    for p in masked_copynet.parameters():
        p.requires_grad = False

    hidden_dim = masked_copynet._encoder_output_dim
    body = BoWMaxAndMeanEncoder(embedding_dim=hidden_dim, hidden_dim=[64, 32])
    attention = AdditiveAttention(vector_dim=body.get_output_dim(),
                                  matrix_dim=HID_DIM * 2)

    model = DeepLevenshtein(vocab=masked_copynet.vocab,
                            text_field_embedder=masked_copynet._embedder,
                            seq2seq_encoder=masked_copynet._encoder,
                            seq2vec_encoder=body,
                            attention=attention)
    return model
Exemplo n.º 5
0
def get_deep_levenshtein_attention(vocab: Vocabulary) -> DeepLevenshtein:

    token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'),
                                embedding_dim=EMB_DIM)
    word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding})
    lstm = PytorchSeq2SeqWrapper(
        torch.nn.LSTM(EMB_DIM, HID_DIM, batch_first=True, bidirectional=True))
    body = BoWMaxAndMeanEncoder(embedding_dim=HID_DIM * 2, hidden_dim=[64, 32])
    attention = AdditiveAttention(vector_dim=body.get_output_dim(),
                                  matrix_dim=HID_DIM * 2)

    model = DeepLevenshtein(vocab=vocab,
                            text_field_embedder=word_embeddings,
                            seq2seq_encoder=lstm,
                            seq2vec_encoder=body,
                            attention=attention)
    return model
Exemplo n.º 6
0
 def test_forward_does_an_additive_product(self):
     params = Params({
             'vector_dim': 2,
             'matrix_dim': 3,
             'normalize': False,
             })
     additive = AdditiveAttention.from_params(params)
     additive._w_matrix = Parameter(torch.Tensor([[-0.2, 0.3], [-0.5, 0.5]]))
     additive._u_matrix = Parameter(torch.Tensor([[0., 1.], [1., 1.], [1., -1.]]))
     additive._v_vector = Parameter(torch.Tensor([[1.], [-1.]]))
     vectors = torch.FloatTensor([[0.7, -0.8], [0.4, 0.9]])
     matrices = torch.FloatTensor([
             [[1., -1., 3.], [0.5, -0.3, 0.], [0.2, -1., 1.], [0.7, 0.8, -1.]],
             [[-2., 3., -3.], [0.6, 0.2, 2.], [0.5, -0.4, -1.], [0.2, 0.2, 0.]]])
     result = additive(vectors, matrices).detach().numpy()
     assert result.shape == (2, 4)
     assert_almost_equal(result, [
             [1.975072, -0.04997836, 1.2176098, -0.9205586],
             [-1.4851665, 1.489604, -1.890285, -1.0672251]])
Exemplo n.º 7
0
    def __init__(
        self,
        vocab: Vocabulary,
        encoder: Seq2SeqEncoder,
        freeze_encoder: bool = False,
        dropout: float = 0.0,
        tokens_namespace: str = "tokens",
        rule_namespace: str = "rule_labels",
        denotation_namespace: str = "labels",
        num_parse_only_batches: int = 0,
        use_gold_program_for_eval: bool = True,
        load_weights: str = None,
        use_modules: bool = True,
        positive_iou_threshold: float = 0.5,
        negative_iou_threshold: float = 0.5,
        nmn_settings: Dict = None,
    ) -> None:
        super().__init__(vocab)
        self._encoder = encoder
        self._max_decoding_steps = 10
        self._add_action_bias = True
        self._dropout = torch.nn.Dropout(p=dropout)
        self._tokens_namespace = tokens_namespace
        self._rule_namespace = rule_namespace
        self._denotation_namespace = denotation_namespace
        self._denotation_accuracy = denotation_namespace
        self._num_parse_only_batches = num_parse_only_batches
        self._use_gold_program_for_eval = use_gold_program_for_eval
        self._nmn_settings = nmn_settings
        self._use_modules = use_modules
        self._training_batches_so_far = 0

        self._denotation_accuracy = CategoricalAccuracy()
        self._box_f1_score = ClassificationModuleScore(
            positive_iou_threshold=positive_iou_threshold,
            negative_iou_threshold=negative_iou_threshold,
        )
        self._best_box_f1_score = ClassificationModuleScore(
            positive_iou_threshold=positive_iou_threshold,
            negative_iou_threshold=negative_iou_threshold,
        )
        # TODO(mattg): use FullSequenceMatch instead of this.
        self._program_accuracy = Average()
        self._program_similarity = Average()
        self.loss = torch.nn.BCELoss()
        self.loss_with_logits = torch.nn.BCEWithLogitsLoss()

        self._action_padding_index = -1  # the padding value used by IndexField
        num_actions = vocab.get_vocab_size(self._rule_namespace)
        action_embedding_dim = 100
        if self._add_action_bias:
            input_action_dim = action_embedding_dim + 1
        else:
            input_action_dim = action_embedding_dim
        self._action_embedder = Embedding(num_embeddings=num_actions,
                                          embedding_dim=input_action_dim)
        self._output_action_embedder = Embedding(
            num_embeddings=num_actions, embedding_dim=action_embedding_dim)

        if self._use_modules:
            self._language_parameters = VisualReasoningNlvr2Parameters(
                hidden_dim=self._encoder.get_output_dim(),
                initializer=self._encoder.encoder.model.init_bert_weights,
                max_boxes=self._nmn_settings["max_boxes"],
                dropout=dropout,
                nmn_settings=nmn_settings,
            )
        else:
            hid_dim = self._encoder.get_output_dim()
            self.logit_fc = torch.nn.Sequential(
                torch.nn.Linear(hid_dim * 2, hid_dim * 2),
                GeLU(),
                BertLayerNorm(hid_dim * 2, eps=1e-12),
                torch.nn.Linear(hid_dim * 2, 1),
            )
            self.logit_fc.apply(self._encoder.encoder.model.init_bert_weights)

        # This is what we pass as input in the first step of decoding, when we don't have a
        # previous action, or a previous utterance attention.
        encoder_output_dim = self._encoder.get_output_dim()

        self._decoder_num_layers = 1

        self._beam_search = BeamSearch(beam_size=10)
        self._decoder_trainer = MaximumMarginalLikelihood()
        self._first_action_embedding = torch.nn.Parameter(
            torch.FloatTensor(action_embedding_dim))
        self._first_attended_utterance = torch.nn.Parameter(
            torch.FloatTensor(encoder_output_dim))
        torch.nn.init.normal_(self._first_action_embedding)
        torch.nn.init.normal_(self._first_attended_utterance)
        self._transition_function = BasicTransitionFunction(
            encoder_output_dim=encoder_output_dim,
            action_embedding_dim=action_embedding_dim,
            input_attention=AdditiveAttention(vector_dim=encoder_output_dim,
                                              matrix_dim=encoder_output_dim),
            add_action_bias=self._add_action_bias,
            dropout=dropout,
            num_layers=self._decoder_num_layers,
        )

        # Our language is constant across instances, so we just create one up front that we can
        # re-use to construct the `GrammarStatelet`.
        self._world = VisualReasoningNlvr2Language(None, None, None, None,
                                                   None, None)

        if load_weights is not None:
            if not os.path.exists(load_weights):
                print('Could not find weights path: ' + load_weights +
                      '. Continuing without loading weights.')
            else:
                if torch.cuda.is_available():
                    state = torch.load(load_weights)
                else:
                    state = torch.load(load_weights, map_location="cpu")
                encoder_prefix = "_encoder"
                lang_params_prefix = "_language_parameters"
                for key in list(state.keys()):
                    if (key[:len(encoder_prefix)] != encoder_prefix
                            and key[:len(lang_params_prefix)] !=
                            lang_params_prefix):
                        del state[key]
                    if "relate_layer" in key:
                        del state[key]
                self.load_state_dict(state, strict=False)

        if freeze_encoder:
            for param in self._encoder.parameters():
                param.requires_grad = False

        self.consistency_group_map = {}
Exemplo n.º 8
0
    def __init__(self,
                 vocab: Vocabulary,
                 source_embedder_1: TextFieldEmbedder,
                 source_encoder_1: Seq2SeqEncoder,
                 beam_size: int,
                 max_decoding_steps: int,
                 decoder_output_dim: int,
                 target_embedding_dim: int = 30,
                 namespace: str = "tokens",
                 tensor_based_metric: Metric = None,
                 align_embeddings: bool = True,
                 source_embedder_2: TextFieldEmbedder = None,
                 source_encoder_2: Seq2SeqEncoder = None) -> None:
        super().__init__(vocab)
        self._source_embedder_1 = source_embedder_1
        self._source_embedder_2 = source_embedder_1 or self._source_embedder_1
        self._source_encoder_1 = source_encoder_1
        self._source_encoder_2 = source_encoder_2 or self._source_encoder_1

        self._source_namespace = namespace
        self._target_namespace = namespace

        self.encoder_output_dim_1 = self._source_encoder_1.get_output_dim()
        self.encoder_output_dim_2 = self._source_encoder_2.get_output_dim()
        self.cated_encoder_out_dim = self.encoder_output_dim_1 + self.encoder_output_dim_2
        self.decoder_output_dim = decoder_output_dim

        # TODO: AllenNLP实现的Addictive Attention可能没有bias
        self._attention_1 = AdditiveAttention(self.decoder_output_dim,
                                              self.encoder_output_dim_1)
        self._attention_2 = AdditiveAttention(self.decoder_output_dim,
                                              self.encoder_output_dim_2)

        if not align_embeddings:
            self.target_embedding_dim = target_embedding_dim
            self._target_vocab_size = self.vocab.get_vocab_size(
                namespace=self._target_namespace)
            self._target_embedder = Embedding(self._target_vocab_size,
                                              target_embedding_dim)
        else:
            self._target_embedder = self._source_embedder_1._token_embedders[
                "tokens"]
            self._target_vocab_size = self.vocab.get_vocab_size(
                namespace=self._target_namespace)
            self.target_embedding_dim = self._target_embedder.get_output_dim()

        self.decoder_input_dim = self.encoder_output_dim_1 + self.encoder_output_dim_2 + \
                                 self.target_embedding_dim

        self._decoder_cell = LSTMCell(self.decoder_input_dim,
                                      self.decoder_output_dim)

        # 用于将两个encoder的最后隐层状态映射成解码器初始状态
        self._encoder_out_projection_layer = torch.nn.Linear(
            in_features=self.cated_encoder_out_dim,
            out_features=self.decoder_output_dim
        )  #  TODO: bias - true of false?

        # 软门控机制参数,用于计算lambda
        self._gate_projection_layer = torch.nn.Linear(
            in_features=self.decoder_output_dim + self.decoder_input_dim,
            out_features=1,
            bias=False)

        self._start_index = self.vocab.get_token_index(START_SYMBOL, namespace)
        self._end_index = self.vocab.get_token_index(END_SYMBOL, namespace)
        self._pad_index = self.vocab.get_token_index(self.vocab._padding_token,
                                                     namespace)
        self._beam_search = BeamSearch(self._end_index,
                                       max_steps=max_decoding_steps,
                                       beam_size=beam_size)

        self._tensor_based_metric = tensor_based_metric or \
            BLEU(exclude_indices={self._pad_index, self._end_index, self._start_index})
Exemplo n.º 9
0
    def __init__(
        self,
        vocab: Vocabulary,
        encoder: Seq2SeqEncoder,
        dropout: float = 0.0,
        object_loss_multiplier: float = 0.0,
        denotation_loss_multiplier: float = 1.0,
        tokens_namespace: str = "tokens",
        rule_namespace: str = "rule_labels",
        denotation_namespace: str = "labels",
        num_parse_only_batches: int = 0,
        use_gold_program_for_eval: bool = False,
        nmn_settings: Dict = None,
    ) -> None:
        # Atis semantic parser init
        super().__init__(vocab)
        self._encoder = encoder
        self._dropout = torch.nn.Dropout(p=dropout)
        self._obj_loss_multiplier = object_loss_multiplier
        self._denotation_loss_multiplier = denotation_loss_multiplier
        self._tokens_namespace = tokens_namespace
        self._rule_namespace = rule_namespace
        self._denotation_namespace = denotation_namespace
        self._num_parse_only_batches = num_parse_only_batches
        self._use_gold_program_for_eval = use_gold_program_for_eval
        self._nmn_settings = nmn_settings
        self._training_batches_so_far = 0

        self._denotation_accuracy = CategoricalAccuracy()
        self._proposal_accuracy = CategoricalAccuracy()
        # TODO(mattg): use FullSequenceMatch instead of this.
        self._program_accuracy = Average()
        self.loss = torch.nn.BCELoss()

        self._action_padding_index = -1  # the padding value used by IndexField
        num_actions = vocab.get_vocab_size(self._rule_namespace)
        action_embedding_dim = 100
        self._add_action_bias = True
        if self._add_action_bias:
            input_action_dim = action_embedding_dim + 1
        else:
            input_action_dim = action_embedding_dim
        self._action_embedder = Embedding(num_embeddings=num_actions,
                                          embedding_dim=input_action_dim)
        self._output_action_embedder = Embedding(
            num_embeddings=num_actions, embedding_dim=action_embedding_dim)

        self._language_parameters = VisualReasoningGqaParameters(
            hidden_dim=self._encoder.get_output_dim(),
            initializer=self._encoder.encoder.model.init_bert_weights,
        )

        # This is what we pass as input in the first step of decoding, when we don't have a
        # previous action, or a previous utterance attention.
        self._first_action_embedding = torch.nn.Parameter(
            torch.FloatTensor(action_embedding_dim))
        # encoder_output_dim = self._lxrt_encoder.get_output_dim()
        encoder_output_dim = self._encoder.get_output_dim()
        self._first_attended_utterance = torch.nn.Parameter(
            torch.FloatTensor(encoder_output_dim))
        torch.nn.init.normal_(self._first_action_embedding)
        torch.nn.init.normal_(self._first_attended_utterance)

        self._decoder_num_layers = 1

        self._beam_search = BeamSearch(beam_size=10)
        self._decoder_trainer = MaximumMarginalLikelihood()
        self._transition_function = BasicTransitionFunction(
            encoder_output_dim=encoder_output_dim,
            action_embedding_dim=action_embedding_dim,
            input_attention=AdditiveAttention(vector_dim=encoder_output_dim,
                                              matrix_dim=encoder_output_dim),
            add_action_bias=self._add_action_bias,
            dropout=dropout,
            num_layers=self._decoder_num_layers,
        )
        self._language_parameters.apply(
            self._encoder.encoder.model.init_bert_weights)
        # attention.apply(self._lxrt_encoder.encoder.model.init_bert_weights)
        # self._transition_function.apply(self._lxrt_encoder.encoder.model.init_bert_weights)

        # Our language is constant across instances, so we just create one up front that we can
        # re-use to construct the `GrammarStatelet`.
        self._world = VisualReasoningGqaLanguage(None, None, None, None, None)