def __init__( self, vocab: Vocabulary, embed: TextFieldEmbedder, encoder_size: int, decoder_size: int, num_layers: int, beam_size: int, max_decoding_steps: int, use_bleu: bool = True, initializer: InitializerApplicator = InitializerApplicator() ) -> None: super().__init__(vocab) self.START, self.END = self.vocab.get_token_index( START_SYMBOL), self.vocab.get_token_index(END_SYMBOL) self.OOV = self.vocab.get_token_index(self.vocab._oov_token) # pylint: disable=protected-access self.PAD = self.vocab.get_token_index(self.vocab._padding_token) # pylint: disable=protected-access self.COPY = self.vocab.get_token_index("@@COPY@@") self.KEEP = self.vocab.get_token_index("@@KEEP@@") self.DROP = self.vocab.get_token_index("@@DROP@@") self.SYMBOL = (self.START, self.END, self.PAD, self.KEEP, self.DROP) self.vocab_size = vocab.get_vocab_size() self.EMB = embed self.emb_size = self.EMB.token_embedder_tokens.output_dim self.encoder_size, self.decoder_size = encoder_size, decoder_size self.FACT_ENCODER = FeedForward(3 * self.emb_size, 1, encoder_size, nn.Tanh()) self.ATTN = AdditiveAttention(encoder_size + decoder_size, encoder_size) self.COPY_ATTN = AdditiveAttention(decoder_size, encoder_size) module = nn.LSTM(self.emb_size, encoder_size // 2, num_layers, bidirectional=True, batch_first=True) self.BUFFER = PytorchSeq2SeqWrapper( module) # BiLSTM to encode draft text self.STREAM = nn.LSTMCell(2 * encoder_size, decoder_size) # Store revised text self.BEAM = BeamSearch(self.END, max_steps=max_decoding_steps, beam_size=beam_size) self.U = nn.Sequential(nn.Linear(2 * encoder_size, decoder_size), nn.Tanh()) self.ADD = nn.Sequential(nn.Linear(self.emb_size, encoder_size), nn.Tanh()) self.P = nn.Sequential( nn.Linear(encoder_size + decoder_size, decoder_size), nn.Tanh()) self.W = nn.Linear(decoder_size, self.vocab_size) self.G = nn.Sequential(nn.Linear(decoder_size, 1), nn.Sigmoid()) initializer(self) self._bleu = BLEU( exclude_indices=set(self.SYMBOL)) if use_bleu else None
def get_masked_copynet_with_attention(vocab: Vocabulary, max_decoding_steps: int = 20, beam_size: int = 1) -> MaskedCopyNet: word_embeddings = Embedding( num_embeddings=vocab.get_vocab_size("tokens"), embedding_dim=EMB_DIM ) word_embeddings = BasicTextFieldEmbedder({"tokens": word_embeddings}) masker_embeddings = Embedding( num_embeddings=vocab.get_vocab_size("mask_tokens"), embedding_dim=MASK_EMB_DIM ) masker_embeddings = BasicTextFieldEmbedder({"tokens": masker_embeddings}) attention = AdditiveAttention(vector_dim=HID_DIM * 2, matrix_dim=HID_DIM * 2) mask_attention = AdditiveAttention(vector_dim=HID_DIM * 2, matrix_dim=MASK_EMB_DIM) lstm = PytorchSeq2SeqWrapper(nn.LSTM(EMB_DIM, HID_DIM, batch_first=True, bidirectional=True)) return MaskedCopyNet( vocab=vocab, embedder=word_embeddings, encoder=lstm, max_decoding_steps=max_decoding_steps, attention=attention, mask_embedder=masker_embeddings, mask_attention=mask_attention, beam_size=beam_size )
def __init__(self, encoder_output_dim: int, action_embedding_dim: int, input_attention: Attention, past_attention: Attention, activation: Activation = Activation.by_name('relu')(), enable_gating: bool = True, ablation_mode: str = None, predict_start_type_separately: bool = True, num_start_types: int = None, add_action_bias: bool = True, dropout: float = 0.0, num_layers: int = 1) -> None: super().__init__(encoder_output_dim=encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=input_attention, activation=activation, add_action_bias=add_action_bias, dropout=dropout, num_layers=num_layers) self.enable_gating = enable_gating self.ablation_mode = ablation_mode self._decoder_dim = encoder_output_dim self._input_projection_layer = Linear( encoder_output_dim + action_embedding_dim, encoder_output_dim) if add_action_bias: action_embedding_dim = action_embedding_dim + 1 self._past_attention = AdditiveAttention(action_embedding_dim, action_embedding_dim, True) self._action2gate = Linear(action_embedding_dim, 1) if self.ablation_mode != "wo_copy": self._past_copy_attention = AdditiveAttention( action_embedding_dim, action_embedding_dim, False) self._action2copygate = Linear(action_embedding_dim, 1) if self.ablation_mode == "wo_reuse_emb": self._output_projection_layer2 = Linear(encoder_output_dim * 2, action_embedding_dim) self._ent2ent_ff = FeedForward(1, 1, 1, Activation.by_name('linear')()) self._large_dropout = Dropout(0.3)
def get_deep_levenshtein_copynet( masked_copynet: MaskedCopyNet) -> DeepLevenshtein: masked_copynet.eval() for p in masked_copynet.parameters(): p.requires_grad = False hidden_dim = masked_copynet._encoder_output_dim body = BoWMaxAndMeanEncoder(embedding_dim=hidden_dim, hidden_dim=[64, 32]) attention = AdditiveAttention(vector_dim=body.get_output_dim(), matrix_dim=HID_DIM * 2) model = DeepLevenshtein(vocab=masked_copynet.vocab, text_field_embedder=masked_copynet._embedder, seq2seq_encoder=masked_copynet._encoder, seq2vec_encoder=body, attention=attention) return model
def get_deep_levenshtein_attention(vocab: Vocabulary) -> DeepLevenshtein: token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'), embedding_dim=EMB_DIM) word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding}) lstm = PytorchSeq2SeqWrapper( torch.nn.LSTM(EMB_DIM, HID_DIM, batch_first=True, bidirectional=True)) body = BoWMaxAndMeanEncoder(embedding_dim=HID_DIM * 2, hidden_dim=[64, 32]) attention = AdditiveAttention(vector_dim=body.get_output_dim(), matrix_dim=HID_DIM * 2) model = DeepLevenshtein(vocab=vocab, text_field_embedder=word_embeddings, seq2seq_encoder=lstm, seq2vec_encoder=body, attention=attention) return model
def test_forward_does_an_additive_product(self): params = Params({ 'vector_dim': 2, 'matrix_dim': 3, 'normalize': False, }) additive = AdditiveAttention.from_params(params) additive._w_matrix = Parameter(torch.Tensor([[-0.2, 0.3], [-0.5, 0.5]])) additive._u_matrix = Parameter(torch.Tensor([[0., 1.], [1., 1.], [1., -1.]])) additive._v_vector = Parameter(torch.Tensor([[1.], [-1.]])) vectors = torch.FloatTensor([[0.7, -0.8], [0.4, 0.9]]) matrices = torch.FloatTensor([ [[1., -1., 3.], [0.5, -0.3, 0.], [0.2, -1., 1.], [0.7, 0.8, -1.]], [[-2., 3., -3.], [0.6, 0.2, 2.], [0.5, -0.4, -1.], [0.2, 0.2, 0.]]]) result = additive(vectors, matrices).detach().numpy() assert result.shape == (2, 4) assert_almost_equal(result, [ [1.975072, -0.04997836, 1.2176098, -0.9205586], [-1.4851665, 1.489604, -1.890285, -1.0672251]])
def __init__( self, vocab: Vocabulary, encoder: Seq2SeqEncoder, freeze_encoder: bool = False, dropout: float = 0.0, tokens_namespace: str = "tokens", rule_namespace: str = "rule_labels", denotation_namespace: str = "labels", num_parse_only_batches: int = 0, use_gold_program_for_eval: bool = True, load_weights: str = None, use_modules: bool = True, positive_iou_threshold: float = 0.5, negative_iou_threshold: float = 0.5, nmn_settings: Dict = None, ) -> None: super().__init__(vocab) self._encoder = encoder self._max_decoding_steps = 10 self._add_action_bias = True self._dropout = torch.nn.Dropout(p=dropout) self._tokens_namespace = tokens_namespace self._rule_namespace = rule_namespace self._denotation_namespace = denotation_namespace self._denotation_accuracy = denotation_namespace self._num_parse_only_batches = num_parse_only_batches self._use_gold_program_for_eval = use_gold_program_for_eval self._nmn_settings = nmn_settings self._use_modules = use_modules self._training_batches_so_far = 0 self._denotation_accuracy = CategoricalAccuracy() self._box_f1_score = ClassificationModuleScore( positive_iou_threshold=positive_iou_threshold, negative_iou_threshold=negative_iou_threshold, ) self._best_box_f1_score = ClassificationModuleScore( positive_iou_threshold=positive_iou_threshold, negative_iou_threshold=negative_iou_threshold, ) # TODO(mattg): use FullSequenceMatch instead of this. self._program_accuracy = Average() self._program_similarity = Average() self.loss = torch.nn.BCELoss() self.loss_with_logits = torch.nn.BCEWithLogitsLoss() self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) action_embedding_dim = 100 if self._add_action_bias: input_action_dim = action_embedding_dim + 1 else: input_action_dim = action_embedding_dim self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=input_action_dim) self._output_action_embedder = Embedding( num_embeddings=num_actions, embedding_dim=action_embedding_dim) if self._use_modules: self._language_parameters = VisualReasoningNlvr2Parameters( hidden_dim=self._encoder.get_output_dim(), initializer=self._encoder.encoder.model.init_bert_weights, max_boxes=self._nmn_settings["max_boxes"], dropout=dropout, nmn_settings=nmn_settings, ) else: hid_dim = self._encoder.get_output_dim() self.logit_fc = torch.nn.Sequential( torch.nn.Linear(hid_dim * 2, hid_dim * 2), GeLU(), BertLayerNorm(hid_dim * 2, eps=1e-12), torch.nn.Linear(hid_dim * 2, 1), ) self.logit_fc.apply(self._encoder.encoder.model.init_bert_weights) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous utterance attention. encoder_output_dim = self._encoder.get_output_dim() self._decoder_num_layers = 1 self._beam_search = BeamSearch(beam_size=10) self._decoder_trainer = MaximumMarginalLikelihood() self._first_action_embedding = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) self._first_attended_utterance = torch.nn.Parameter( torch.FloatTensor(encoder_output_dim)) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_utterance) self._transition_function = BasicTransitionFunction( encoder_output_dim=encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=AdditiveAttention(vector_dim=encoder_output_dim, matrix_dim=encoder_output_dim), add_action_bias=self._add_action_bias, dropout=dropout, num_layers=self._decoder_num_layers, ) # Our language is constant across instances, so we just create one up front that we can # re-use to construct the `GrammarStatelet`. self._world = VisualReasoningNlvr2Language(None, None, None, None, None, None) if load_weights is not None: if not os.path.exists(load_weights): print('Could not find weights path: ' + load_weights + '. Continuing without loading weights.') else: if torch.cuda.is_available(): state = torch.load(load_weights) else: state = torch.load(load_weights, map_location="cpu") encoder_prefix = "_encoder" lang_params_prefix = "_language_parameters" for key in list(state.keys()): if (key[:len(encoder_prefix)] != encoder_prefix and key[:len(lang_params_prefix)] != lang_params_prefix): del state[key] if "relate_layer" in key: del state[key] self.load_state_dict(state, strict=False) if freeze_encoder: for param in self._encoder.parameters(): param.requires_grad = False self.consistency_group_map = {}
def __init__(self, vocab: Vocabulary, source_embedder_1: TextFieldEmbedder, source_encoder_1: Seq2SeqEncoder, beam_size: int, max_decoding_steps: int, decoder_output_dim: int, target_embedding_dim: int = 30, namespace: str = "tokens", tensor_based_metric: Metric = None, align_embeddings: bool = True, source_embedder_2: TextFieldEmbedder = None, source_encoder_2: Seq2SeqEncoder = None) -> None: super().__init__(vocab) self._source_embedder_1 = source_embedder_1 self._source_embedder_2 = source_embedder_1 or self._source_embedder_1 self._source_encoder_1 = source_encoder_1 self._source_encoder_2 = source_encoder_2 or self._source_encoder_1 self._source_namespace = namespace self._target_namespace = namespace self.encoder_output_dim_1 = self._source_encoder_1.get_output_dim() self.encoder_output_dim_2 = self._source_encoder_2.get_output_dim() self.cated_encoder_out_dim = self.encoder_output_dim_1 + self.encoder_output_dim_2 self.decoder_output_dim = decoder_output_dim # TODO: AllenNLP实现的Addictive Attention可能没有bias self._attention_1 = AdditiveAttention(self.decoder_output_dim, self.encoder_output_dim_1) self._attention_2 = AdditiveAttention(self.decoder_output_dim, self.encoder_output_dim_2) if not align_embeddings: self.target_embedding_dim = target_embedding_dim self._target_vocab_size = self.vocab.get_vocab_size( namespace=self._target_namespace) self._target_embedder = Embedding(self._target_vocab_size, target_embedding_dim) else: self._target_embedder = self._source_embedder_1._token_embedders[ "tokens"] self._target_vocab_size = self.vocab.get_vocab_size( namespace=self._target_namespace) self.target_embedding_dim = self._target_embedder.get_output_dim() self.decoder_input_dim = self.encoder_output_dim_1 + self.encoder_output_dim_2 + \ self.target_embedding_dim self._decoder_cell = LSTMCell(self.decoder_input_dim, self.decoder_output_dim) # 用于将两个encoder的最后隐层状态映射成解码器初始状态 self._encoder_out_projection_layer = torch.nn.Linear( in_features=self.cated_encoder_out_dim, out_features=self.decoder_output_dim ) # TODO: bias - true of false? # 软门控机制参数,用于计算lambda self._gate_projection_layer = torch.nn.Linear( in_features=self.decoder_output_dim + self.decoder_input_dim, out_features=1, bias=False) self._start_index = self.vocab.get_token_index(START_SYMBOL, namespace) self._end_index = self.vocab.get_token_index(END_SYMBOL, namespace) self._pad_index = self.vocab.get_token_index(self.vocab._padding_token, namespace) self._beam_search = BeamSearch(self._end_index, max_steps=max_decoding_steps, beam_size=beam_size) self._tensor_based_metric = tensor_based_metric or \ BLEU(exclude_indices={self._pad_index, self._end_index, self._start_index})
def __init__( self, vocab: Vocabulary, encoder: Seq2SeqEncoder, dropout: float = 0.0, object_loss_multiplier: float = 0.0, denotation_loss_multiplier: float = 1.0, tokens_namespace: str = "tokens", rule_namespace: str = "rule_labels", denotation_namespace: str = "labels", num_parse_only_batches: int = 0, use_gold_program_for_eval: bool = False, nmn_settings: Dict = None, ) -> None: # Atis semantic parser init super().__init__(vocab) self._encoder = encoder self._dropout = torch.nn.Dropout(p=dropout) self._obj_loss_multiplier = object_loss_multiplier self._denotation_loss_multiplier = denotation_loss_multiplier self._tokens_namespace = tokens_namespace self._rule_namespace = rule_namespace self._denotation_namespace = denotation_namespace self._num_parse_only_batches = num_parse_only_batches self._use_gold_program_for_eval = use_gold_program_for_eval self._nmn_settings = nmn_settings self._training_batches_so_far = 0 self._denotation_accuracy = CategoricalAccuracy() self._proposal_accuracy = CategoricalAccuracy() # TODO(mattg): use FullSequenceMatch instead of this. self._program_accuracy = Average() self.loss = torch.nn.BCELoss() self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) action_embedding_dim = 100 self._add_action_bias = True if self._add_action_bias: input_action_dim = action_embedding_dim + 1 else: input_action_dim = action_embedding_dim self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=input_action_dim) self._output_action_embedder = Embedding( num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._language_parameters = VisualReasoningGqaParameters( hidden_dim=self._encoder.get_output_dim(), initializer=self._encoder.encoder.model.init_bert_weights, ) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous utterance attention. self._first_action_embedding = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) # encoder_output_dim = self._lxrt_encoder.get_output_dim() encoder_output_dim = self._encoder.get_output_dim() self._first_attended_utterance = torch.nn.Parameter( torch.FloatTensor(encoder_output_dim)) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_utterance) self._decoder_num_layers = 1 self._beam_search = BeamSearch(beam_size=10) self._decoder_trainer = MaximumMarginalLikelihood() self._transition_function = BasicTransitionFunction( encoder_output_dim=encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=AdditiveAttention(vector_dim=encoder_output_dim, matrix_dim=encoder_output_dim), add_action_bias=self._add_action_bias, dropout=dropout, num_layers=self._decoder_num_layers, ) self._language_parameters.apply( self._encoder.encoder.model.init_bert_weights) # attention.apply(self._lxrt_encoder.encoder.model.init_bert_weights) # self._transition_function.apply(self._lxrt_encoder.encoder.model.init_bert_weights) # Our language is constant across instances, so we just create one up front that we can # re-use to construct the `GrammarStatelet`. self._world = VisualReasoningGqaLanguage(None, None, None, None, None)