Exemplo n.º 1
0
    def __init__(self,
                 word_embeddings: TextFieldEmbedder,
                 encoder: Seq2VecEncoder,
                 vocab: Vocabulary,
                 positive_label: int = 4) -> None:
        super().__init__(vocab)
        # We need the embeddings to convert word IDs to their vector representations
        self.word_embeddings = word_embeddings

        # bottle-neck
        self.linear_bn = torch.nn.Linear(
            in_features=word_embeddings.get_output_dim(),
            out_features=encoder.get_input_dim())

        self.encoder = encoder

        # After converting a sequence of vectors to a single vector, we feed it into
        # a fully-connected linear layer to reduce the dimension to the total number of labels.
        self.linear = torch.nn.Linear(
            in_features=encoder.get_output_dim(),
            out_features=vocab.get_vocab_size('labels'))

        # Monitor the metrics - we use accuracy, as well as prec, rec, f1 for 4 (very positive)
        self.accuracy = CategoricalAccuracy()
        self.f1_measure = F1Measure(positive_label)

        # We use the cross entropy loss because this is a classification task.
        # Note that PyTorch's CrossEntropyLoss combines softmax and log likelihood loss,
        # which makes it unnecessary to add a separate softmax layer.
        self.loss_function = torch.nn.CrossEntropyLoss()
Exemplo n.º 2
0
    def __init__(self, word_embedder: TextFieldEmbedder,
                 position_embedder: TextFieldEmbedder, polarities: list,
                 vocab: Vocabulary, configuration: dict):
        super().__init__(vocab)
        self.configuration = configuration
        self.word_embedder = word_embedder
        self.position_embedder = position_embedder
        self.polarites = polarities
        self.polarity_num = len(polarities)
        self.sentiment_loss = nn.CrossEntropyLoss()
        self._accuracy = metrics.CategoricalAccuracy()

        word_embedding_dim = word_embedder.get_output_dim()
        lstm_input_size = word_embedding_dim
        num_layers = 3
        self.lstm = torch.nn.LSTM(lstm_input_size,
                                  int(word_embedding_dim / 2),
                                  batch_first=True,
                                  bidirectional=True,
                                  num_layers=num_layers,
                                  dropout=0.5)
        sentiment_fc_input_size = word_embedding_dim
        self.sentiment_fc = nn.Sequential(
            nn.Linear(sentiment_fc_input_size, sentiment_fc_input_size),
            nn.ReLU(), nn.Linear(sentiment_fc_input_size, self.polarity_num))
        self.dropout_after_embedding_layer = nn.Dropout(0.5)
        self.dropout_after_lstm_layer = nn.Dropout(0.5)
    def __init__(self, word_embeddings: TextFieldEmbedder, n_grams: int,
                 n_kernels: int, conv_out_dim: int):

        super(Conv_KNRM, self).__init__()

        self.word_embeddings = word_embeddings

        # static - kernel size & magnitude variables
        self.mu = Variable(torch.cuda.FloatTensor(self.kernel_mus(n_kernels)),
                           requires_grad=False).view(1, 1, 1, n_kernels)
        self.sigma = Variable(torch.cuda.FloatTensor(
            self.kernel_sigmas(n_kernels)),
                              requires_grad=False).view(1, 1, 1, n_kernels)

        self.convolutions = []
        for i in range(1, n_grams + 1):
            self.convolutions.append(
                nn.Sequential(
                    nn.ConstantPad1d((0, i - 1), 0),
                    nn.Conv1d(kernel_size=i,
                              in_channels=word_embeddings.get_output_dim(),
                              out_channels=conv_out_dim), nn.ReLU()))
        self.convolutions = nn.ModuleList(
            self.convolutions)  # register conv as part of the model

        # this does not really do "attention" - just a plain cosine matrix calculation (without learnable weights)
        self.cosine_module = CosineMatrixAttention()

        # *9 because we concat the 3x3 conv match sums together before the dense layer
        self.dense = nn.Linear(n_kernels * n_grams * n_grams, 1, bias=False)

        # init with small weights, otherwise the dense output is way to high fot
        torch.nn.init.uniform_(self.dense.weight, -0.014,
                               0.014)  # inits taken from matchzoo
Exemplo n.º 4
0
    def __init__(self,
                 word_embeddings: TextFieldEmbedder,
                 vocab: Vocabulary) -> None:
        super().__init__(vocab)
        self.word_embeddings = word_embeddings

        self.out = torch.nn.Linear(
            in_features=self.word_embeddings.get_output_dim() * 4,
            out_features=vocab.get_vocab_size('labels')
        )
        self.accuracy = CategoricalAccuracy()
        self.f_score_0 = F1Measure(positive_label=0)
        self.f_score_1 = F1Measure(positive_label=1)
        self.f_score_2 = F1Measure(positive_label=2)
        self.loss = CrossEntropyLoss()
        self.attention = BilinearAttention(word_embeddings.get_output_dim() * 3, word_embeddings.get_output_dim())
Exemplo n.º 5
0
    def __init__(self, word_embeddings: TextFieldEmbedder,
                 encoder: Seq2VecEncoder, dropout_p: int,
                 vocab: Vocabulary) -> None:
        super().__init__(vocab)

        self.word_embeddings = word_embeddings

        self.embedding2input = FeedForward(
            input_dim=word_embeddings.get_output_dim(),
            num_layers=1,
            hidden_dims=encoder.get_input_dim(),
            activations=Activation.by_name('relu')(),
            dropout=dropout_p)

        self.encoder = encoder

        self.hidden2intermediate = FeedForward(
            input_dim=encoder.get_output_dim(),
            num_layers=1,
            hidden_dims=int(encoder.get_output_dim() / 2),
            activations=Activation.by_name('relu')(),
            dropout=dropout_p)

        self.intermediate2tag = nn.Linear(
            in_features=int(encoder.get_output_dim() / 2),
            out_features=vocab.get_vocab_size('labels'))

        self.accuracy = CategoricalAccuracy()
        self.loss_function = torch.nn.CrossEntropyLoss()
Exemplo n.º 6
0
 def __init__(self, word_embeddings: TextFieldEmbedder, encoder: Seq2VecEncoder, vocab: Vocabulary) -> None:
     super().__init__(vocab)
     self.word_embedding = word_embeddings
     self.encoder = encoder
     self.hidden2out = torch.nn.Linear(in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size("labels"))
     self.accuracy = MicroMetrics(vocab)
     self.lstm = nn.LSTM(input_size=word_embeddings.get_output_dim(), hidden_size=128, num_layers=1, batch_first=True)
     self.label_index_to_label = self.vocab.get_index_to_token_vocabulary('labels')
Exemplo n.º 7
0
    def __init__(
        self,
        word_embedder: TextFieldEmbedder,
        attribute_embedder: Embedding,
        content_encoder: Seq2SeqEncoder,
        vocab: Vocabulary,
        max_decoding_steps: int = 20,
        beam_size: int = None,
        scheduled_sampling_ratio: float = 0.,
    ) -> None:
        super().__init__(vocab)

        self.scheduled_sampling_ratio = scheduled_sampling_ratio

        # We need the start symbol to provide as the input at the first timestep of decoding, and
        # end symbol as a way to indicate the end of the decoded sequence.
        self.start_index = self.vocab.get_token_index(START_SYMBOL, 'tokens')
        self.end_index = self.vocab.get_token_index(END_SYMBOL, 'tokens')

        # TODO: not sure if we need this
        self.bleu = None

        # At prediction time, we use a beam search to find the most likely sequence of target tokens.
        beam_size = beam_size or 1
        self.max_decoding_steps = max_decoding_steps
        self.beam_search = BeamSearch(self.end_index,
                                      max_steps=max_decoding_steps,
                                      beam_size=beam_size)

        # Dense embedding of source and target vocab tokens and attribute.
        self.word_embedder = word_embedder
        self.attribute_embedder = attribute_embedder

        # Encodes the sequence of source embeddings into a sequence of hidden states.
        self.content_encoder = content_encoder

        num_classes = self.vocab.get_vocab_size('tokens')

        # TODO: not sure if we need this
        self.attention = None

        # Dense embedding of vocab words in the target space.
        embedding_dim = word_embedder.get_output_dim()
        self.target_embedder = Embedding(num_classes, embedding_dim)

        # Decoder output dim needs to be the same as the encoder output dim since we initialize the
        # hidden state of the decoder with the final hidden state of the encoder.
        self.encoder_output_dim = self.content_encoder.get_output_dim(
        ) + embedding_dim
        self.decoder_output_dim = self.encoder_output_dim

        self.decoder_input_dim = embedding_dim

        self.decoder_cell = LSTMCell(self.decoder_input_dim,
                                     self.decoder_output_dim)

        self.output_projection_layer = Linear(self.decoder_output_dim,
                                              num_classes)
Exemplo n.º 8
0
    def __init__(self,
                 word_embeddings: TextFieldEmbedder,
                 vocab: Vocabulary) -> None:
        super().__init__(vocab)
        self.word_embeddings = word_embeddings

        self.bert_seq_encoder = PytorchSeq2VecWrapper(LSTM(word_embeddings.get_output_dim(),
                                                      int(word_embeddings.get_output_dim()/2),
                                                      batch_first=True,
                                                      bidirectional=True))

        self.out = torch.nn.Linear(
            in_features=word_embeddings.get_output_dim()*4,
            out_features=vocab.get_vocab_size('labels')
        )
        self.accuracy = CategoricalAccuracy()
        self.f_score_0 = F1Measure(positive_label=0)
        self.f_score_1 = F1Measure(positive_label=1)
        self.f_score_2 = F1Measure(positive_label=2)
        self.loss = CrossEntropyLoss()
Exemplo n.º 9
0
    def __init__(self,
                 word_embeddings: TextFieldEmbedder,
                 p_encoder: Seq2SeqEncoder,
                 q_encoder: Seq2SeqEncoder,
                 a_encoder: Seq2SeqEncoder,
                 vocab: Vocabulary,
                 embedding_dropout: float = 0.0,
                 encoder_dropout: float = 0.0) -> None:
        # We have to pass the vocabulary to the constructor.
        super().__init__(vocab)

        self.word_embeddings = word_embeddings

        if embedding_dropout > 0:
            self.embedding_dropout = torch.nn.Dropout(p=embedding_dropout)
        else:
            self.embedding_dropout = lambda x: x

        if encoder_dropout > 0:
            self.encoder_dropout = torch.nn.Dropout(p=encoder_dropout)
        else:
            self.encoder_dropout = lambda x: x

        embedding_dim = word_embeddings.get_output_dim()
        self.p_q_match = SequenceAttention(input_dim=embedding_dim)
        self.a_p_match = SequenceAttention(input_dim=embedding_dim)
        self.a_q_match = SequenceAttention(input_dim=embedding_dim)

        # Our model has different encoders for each of the fields (passage,
        # answer and question).
        self.p_encoder = p_encoder
        self.q_encoder = q_encoder
        self.a_encoder = a_encoder

        # Attention layers: passage-question, question-self, answer-self
        self.p_q_attn = BilinearAttention(
            vector_dim=self.q_encoder.get_output_dim(),
            matrix_dim=self.p_encoder.get_output_dim(),
        )
        self.q_self_attn = LinearSelfAttention(
            input_dim=self.q_encoder.get_output_dim()
        )
        self.a_self_attn = LinearSelfAttention(
            input_dim=self.a_encoder.get_output_dim()
        )
        self.p_a_bilinear = torch.nn.Linear(
            in_features=self.p_encoder.get_output_dim(),
            out_features=self.a_encoder.get_output_dim()
        )
        self.q_a_bilinear = torch.nn.Linear(
            in_features=self.q_encoder.get_output_dim(),
            out_features=self.a_encoder.get_output_dim()
        )
Exemplo n.º 10
0
    def __init__(self, word_embedder: TextFieldEmbedder,
                 aspect_embedder: TextFieldEmbedder, categories: list,
                 polarities: list, vocab: Vocabulary, configuration: dict):
        super().__init__(vocab)
        self.configuration = configuration
        self.word_embedder = word_embedder
        self.aspect_embedder = aspect_embedder
        self.categories = categories
        self.polarites = polarities
        self.category_num = len(categories)
        self.polarity_num = len(polarities)
        self.sentiment_loss = nn.CrossEntropyLoss()
        self._accuracy = metrics.CategoricalAccuracy()

        word_embedding_dim = word_embedder.get_output_dim()
        aspect_word_embedding_dim = aspect_embedder.get_output_dim()
        if self.configuration['model_name'] in ['ae-lstm', 'atae-lstm']:
            lstm_input_size = word_embedding_dim + aspect_word_embedding_dim
        else:
            lstm_input_size = word_embedding_dim
        num_layers = 1
        hidden_size = 300
        self.lstm = torch.nn.LSTM(lstm_input_size,
                                  hidden_size,
                                  batch_first=True,
                                  bidirectional=False,
                                  num_layers=num_layers)
        if self.configuration['model_name'] in ['at-lstm', 'atae-lstm']:
            attention_input_size = word_embedding_dim + aspect_word_embedding_dim
            self.sentiment_attention = AttentionInHtt(attention_input_size,
                                                      lstm_input_size)
            self.sentiment_fc = nn.Sequential(
                nn.Linear(hidden_size * 2, self.polarity_num))
        else:
            self.sentiment_attention = None
            self.sentiment_fc = nn.Sequential(
                nn.Linear(hidden_size, self.polarity_num))
Exemplo n.º 11
0
    def __init__(self, word_embeddings: TextFieldEmbedder, vocab: Vocabulary,
                 loss: str, hinge_margin: float) -> None:
        super().__init__(vocab)
        self.word_embeddings = word_embeddings

        self.out = torch.nn.Linear(
            in_features=word_embeddings.get_output_dim(), out_features=1)
        self.accuracy = BooleanAccuracy()
        self.loss_name = loss
        if loss == 'hinge':
            self.loss = MarginRankingLoss(margin=hinge_margin,
                                          reduction='mean')
        else:
            self.loss = BCEWithLogitsLoss(reduction='mean')
        self.sigmoid = torch.nn.Sigmoid()
Exemplo n.º 12
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 sentence_encoder: Seq2VecEncoder,
                 claim_encoder: Seq2SeqEncoder,
                 attention: Attention,
                 max_steps: int = 100,
                 beam_size: int = 5,
                 beta: float = 1.0) -> None:
        super(Seq2SeqClaimRank, self).__init__(vocab)

        self.text_field_embedder = text_field_embedder
        self.sentence_encoder = sentence_encoder
        self.claim_encoder = TimeDistributed(claim_encoder)  # Handles additional sequence dim
        self.claim_encoder_dim = claim_encoder.get_output_dim()
        self.attention = attention
        self.decoder_embedding_dim = text_field_embedder.get_output_dim()
        self.max_steps = max_steps
        self.beam_size = beam_size
        self.beta = beta

        # self.target_embedder = torch.nn.Embedding(vocab.get_vocab_size(), decoder_embedding_dim)

        # Since we are using the sentence encoding as the initial hidden state to the decoder, the
        # decoder hidden dim must match the sentence encoder hidden dim.
        self.decoder_output_dim = sentence_encoder.get_output_dim()
        self.decoder_0_cell = torch.nn.LSTMCell(self.decoder_embedding_dim + self.claim_encoder_dim,
                                                self.decoder_output_dim)
        self.decoder_1_cell = torch.nn.LSTMCell(self.decoder_output_dim,
                                                self.decoder_output_dim)

        # When projecting out we will use attention to combine claim embeddings into a single
        # context embedding, this will be concatenated with the decoder cell output before being
        # fed to the projection layer. Hence the expected input size is:
        #   decoder output dim + claim encoder output dim
        projection_input_dim = self.decoder_output_dim + self.claim_encoder_dim
        self.output_projection_layer = torch.nn.Linear(projection_input_dim,
                                                       vocab.get_vocab_size())

        self._start_index = self.vocab.get_token_index('<s>')
        self._end_index = self.vocab.get_token_index('</s>')

        self.beam_search = BeamSearch(self._end_index, max_steps=max_steps, beam_size=beam_size)
        pad_index = vocab.get_token_index(vocab._padding_token)
        self.bleu = BLEU(exclude_indices={pad_index, self._start_index, self._end_index})
        self.avg_reconstruction_loss = Average()
        self.avg_claim_scoring_loss = Average()
Exemplo n.º 13
0
    def __init__(self,
                 word_embeddings: TextFieldEmbedder,
                 n_grams: int,
                 n_kernels: int,
                 conv_out_dim: int):

        super(Conv_KNRM, self).__init__()

        self.word_embeddings = word_embeddings

        # static - kernel size & magnitude variables
        self.mu = Variable(torch.FloatTensor(self.kernel_mus(n_kernels)), requires_grad = False).view(1, 1, 1, n_kernels)
        self.sigma = Variable(torch.FloatTensor(self.kernel_sigmas(n_kernels)), requires_grad = False).view(1, 1, 1,
                                                                                                          n_kernels)

        # Implement 1 Dimensional CNN layer for each n-gram type
        # Also, use RelU as Activation function
        self.convolutions = []
        for i in range (1, n_grams + 1):
            self.convolutions.append(nn.Sequential(
            nn.ConstantPad1d((0 , i-1 ), 0),
            # the kernel size of the convolutional layer is the same as the current i-gram(uni, bi, tri...) in the loop
            nn.Conv1d(kernel_size = i, in_channels = word_embeddings.get_output_dim(), out_channels = conv_out_dim),
            nn.ReLU()))
            # register conv as part of the model
        self.convolutions = nn.ModuleList(self.convolutions)

        #Cosine similarity matrix
        self.cosine_module = CosineMatrixAttention()


        # Initialize the Linear transformer model:
        # size of the input: number of elements in the soft-TF feautes * number of kernel products (
        # n_kernels *  n_grams * n_grams = all combination of match matrix creation
        # (n-gram pairs from query and document embeddings)
        # the output will be 1 sample
        # also use bias based on the paper formula (by default it's true but just to make sure)
        self.transform = nn.Linear(in_features = n_kernels * n_grams * n_grams, out_features = 1, bias = True)
Exemplo n.º 14
0
    def __init__(self, word_embedder: TextFieldEmbedder,
                 position_embedder: TextFieldEmbedder, polarities: list,
                 vocab: Vocabulary, configuration: dict):
        super().__init__(vocab)
        self.configuration = configuration
        self.word_embedder = word_embedder
        self.position_embedder = position_embedder
        self.polarites = polarities
        self.polarity_num = len(polarities)
        self.sentiment_loss = nn.CrossEntropyLoss()
        self._accuracy = metrics.CategoricalAccuracy()

        word_embedding_dim = word_embedder.get_output_dim()
        lstm_input_size = word_embedding_dim
        sentiment_fc_input_size = lstm_input_size
        self.sentiment_fc = nn.Sequential(
            nn.Linear(sentiment_fc_input_size, sentiment_fc_input_size),
            nn.ReLU(), nn.Linear(sentiment_fc_input_size, self.polarity_num))
        self.dropout_after_embedding_layer = nn.Dropout(0.5)
        self.dropout_after_lstm_layer = nn.Dropout(0.5)

        self.gnn_for_sentiment = GAT(word_embedding_dim, word_embedding_dim,
                                     word_embedding_dim, 4, self.configuration)
Exemplo n.º 15
0
    def __init__(self, word_embeddings: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder, dropout_p: int,
                 vocab: Vocabulary) -> None:
        super().__init__(vocab)

        self.word_embeddings = word_embeddings

        self.embedding2input = FeedForward(
            input_dim=word_embeddings.get_output_dim(),
            num_layers=1,
            hidden_dims=encoder.get_input_dim(),
            activations=Activation.by_name('relu')(),
            dropout=dropout_p)

        self.encoder = encoder

        self.hidden2intermediate = FeedForward(
            input_dim=encoder.get_output_dim(),
            num_layers=1,
            hidden_dims=int(encoder.get_output_dim() / 2),
            activations=Activation.by_name('relu')(),
            dropout=dropout_p)

        self.intermediate2tag = nn.Linear(
            in_features=int(encoder.get_output_dim() / 2),
            out_features=vocab.get_vocab_size('labels'))

        # self.accuracy = CategoricalAccuracy()

        label_vocab = vocab.get_token_to_index_vocabulary('labels').copy()
        # print("label_vocab: ", label_vocab)
        [label_vocab.pop(x) for x in ['O', 'OR']]
        labels_for_metric = list(label_vocab.values())
        # print("labels_for_metric: ", labels_for_metric)
        self.accuracy = CustomFBetaMeasure(beta=1.0,
                                           average='micro',
                                           labels=labels_for_metric)
Exemplo n.º 16
0
    def __init__(
        self,
        vocab: Vocabulary,
        embedder: TextFieldEmbedder,
        encoder: Seq2SeqEncoder,
        emb_to_enc_proj: FeedForward = None,
        feedforward: FeedForward = None,
        dropout: float = 0.0,
        num_tags: int = 2,
        use_crf: bool = False,
    ):
        super().__init__(vocab)
        self.embedder = embedder
        self.emb_to_enc_proj = None
        if emb_to_enc_proj is not None:
            self.emb_to_enc_proj = emb_to_enc_proj
        self.encoder = encoder
        assert (embedder.get_output_dim() == encoder.get_input_dim()
                or emb_to_enc_proj is not None and
                emb_to_enc_proj.get_output_dim() == encoder.get_input_dim())
        self.feedforward = None
        pre_output_dim = encoder.get_output_dim()
        if feedforward is not None:
            assert feedforward.get_input_dim() == encoder.get_output_dim()
            self.feedforward = feedforward
            pre_output_dim = self.feedforward.get_output_dim()

        self.hidden2tag = torch.nn.Linear(in_features=pre_output_dim,
                                          out_features=num_tags)
        self.dropout = torch.nn.Dropout(dropout)
        self.accuracy = CategoricalAccuracy()
        self.f1 = F1Measure(1)
        self.use_crf = use_crf
        if use_crf:
            self.crf = ConditionalRandomField(
                num_tags, include_start_end_transitions=True)
Exemplo n.º 17
0
    def __init__(self, word_embedder: TextFieldEmbedder,
                 aspect_embedder: TextFieldEmbedder, categories: list,
                 polarities: list, vocab: Vocabulary, configuration: dict):
        super().__init__(vocab)
        self.configuration = configuration
        self.word_embedder = word_embedder
        self.aspect_embedder = aspect_embedder
        self.categories = categories
        self.polarites = polarities
        self.category_num = len(categories)
        self.polarity_num = len(polarities)
        self.category_loss = nn.BCEWithLogitsLoss()
        self.sentiment_loss = nn.CrossEntropyLoss()
        self._accuracy = metrics.CategoricalAccuracy()

        word_embedding_dim = word_embedder.get_output_dim()
        lstm_input_size = word_embedding_dim
        num_layers = 1
        hidden_size = 32
        self.aspect_gru = torch.nn.GRU(lstm_input_size,
                                       hidden_size,
                                       batch_first=True,
                                       bidirectional=True,
                                       num_layers=num_layers)
        self.sentiment_gru = torch.nn.GRU(lstm_input_size,
                                          hidden_size,
                                          batch_first=True,
                                          bidirectional=True,
                                          num_layers=num_layers)
        self.aspect_attention = AttentionInHtt(hidden_size * 3, hidden_size)
        self.sentiment_attention = AttentionInHtt(hidden_size * 5,
                                                  hidden_size,
                                                  softmax=False)

        self.sentiment_fc = nn.Sequential(
            nn.Linear(hidden_size * 3, self.polarity_num))
Exemplo n.º 18
0
    def __init__(self,
                 vocab,
                 embedder: TextFieldEmbedder,
                 max_target_positions,
                 dropout,
                 share_decoder_input_output_embed,
                 decoder_output_dim,
                 decoder_conv_dim,
                 decoder_glu,
                 decoder_conv_type,
                 weight_softmax,
                 decoder_attention_heads,
                 weight_dropout,
                 relu_dropout,
                 input_dropout,
                 decoder_normalize_before,
                 attention_dropout,
                 decoder_ffn_embed_dim,
                 decoder_kernel_size_list,
                 adaptive_softmax_cutoff=None,
                 tie_adaptive_weights=False,
                 adaptive_softmax_dropout=0,
                 tie_adaptive_proj=False,
                 adaptive_softmax_factor=0,
                 decoder_layers=6,
                 final_norm=True,
                 padding_idx=0,
                 namespace='target_tokens',
                 vocab_size=None,
                 section_attn=False,
                 swap=False):
        super().__init__()
        self.vocab = vocab
        vocab_size = vocab_size or vocab.get_vocab_size(namespace)
        self.dropout = dropout
        self.share_input_output_embed = share_decoder_input_output_embed

        input_embed_dim = embedder.get_output_dim()
        embed_dim = input_embed_dim
        output_embed_dim = input_embed_dim

        padding_idx = padding_idx
        self.max_target_positions = max_target_positions

        self.embedder = embedder

        self.project_in_dim = GehringLinear(
            input_embed_dim, embed_dim,
            bias=False) if embed_dim != input_embed_dim else None

        self.layers = nn.ModuleList([])
        self.layers.extend([
            DynamicConvDecoderLayer(embed_dim,
                                    decoder_conv_dim,
                                    decoder_glu,
                                    decoder_conv_type,
                                    weight_softmax,
                                    decoder_attention_heads,
                                    weight_dropout,
                                    dropout,
                                    relu_dropout,
                                    input_dropout,
                                    decoder_normalize_before,
                                    attention_dropout,
                                    decoder_ffn_embed_dim,
                                    swap,
                                    kernel_size=decoder_kernel_size_list[i])
            for i in range(decoder_layers)
        ])

        self.adaptive_softmax = None

        self.project_out_dim = GehringLinear(embed_dim, output_embed_dim, bias=False) \
            if embed_dim != output_embed_dim and not tie_adaptive_weights else None

        if adaptive_softmax_cutoff is not None:
            adaptive_inputs = None
            if isinstance(embedder, AdaptiveEmbedding):
                adaptive_inputs = embedder
            elif hasattr(embedder, 'token_embedder_adaptive'):
                adaptive_inputs = embedder.token_embedder_adaptive
            elif tie_adaptive_weights:
                raise ValueError('Cannot locate adaptive_inputs.')
            self.adaptive_softmax = AdaptiveSoftmax(
                vocab_size,
                output_embed_dim,
                eval_str_list(adaptive_softmax_cutoff, type=int),
                dropout=adaptive_softmax_dropout,
                adaptive_inputs=adaptive_inputs,
                factor=adaptive_softmax_factor,
                tie_proj=tie_adaptive_proj,
            )
        elif not self.share_input_output_embed:
            self.embed_out = nn.Parameter(
                torch.Tensor(vocab_size, output_embed_dim))
            nn.init.normal_(self.embed_out, mean=0, std=output_embed_dim**-0.5)
        self.register_buffer('version', torch.Tensor([2]))
        self.normalize = decoder_normalize_before and final_norm
        if self.normalize:
            self.layer_norm = nn.LayerNorm(embed_dim)
Exemplo n.º 19
0
    def __init__(self, vocab, embedder: TextFieldEmbedder, num_layers,
                 hidden_size, dropout, share_decoder_input_output_embed,
                 vocab_size=None, adaptive_softmax_cutoff=None,
                 tie_adaptive_weights=False, adaptive_softmax_dropout=0,
                 tie_adaptive_proj=False, adaptive_softmax_factor=0,
                 article_embed_size=1024, image_embed_size=2048,
                 namespace='target_tokens'):
        super().__init__()
        self.vocab = vocab
        self.hidden_size = hidden_size
        vocab_size = vocab_size or vocab.get_vocab_size(namespace)
        self.dropout = dropout
        self.share_input_output_embed = share_decoder_input_output_embed

        input_embed_dim = embedder.get_output_dim()
        embed_dim = input_embed_dim
        output_embed_dim = input_embed_dim

        self.layers = nn.ModuleList([])
        self.h = nn.ParameterList([])
        self.c = nn.ParameterList([])
        for layer in range(num_layers):
            input_size = hidden_size + embed_dim if layer == 0 else hidden_size
            rnn = LSTMCell(input_size=input_size, hidden_size=hidden_size)
            self.layers.append(rnn)
            self.h.append(nn.Parameter(torch.zeros(1, hidden_size)))
            self.c.append(nn.Parameter(torch.zeros(1, hidden_size)))

        self.image_attention = AttentionLayer(
            hidden_size, image_embed_size, hidden_size, bias=True)

        self.article_attention = AttentionLayer(
            hidden_size, article_embed_size, hidden_size, bias=True)

        self.attn_proj = GehringLinear(hidden_size * 2, hidden_size)

        self.embedder = embedder

        self.adaptive_softmax = None

        self.project_out_dim = GehringLinear(hidden_size, output_embed_dim, bias=False) \
            if hidden_size != output_embed_dim else None

        if adaptive_softmax_cutoff is not None:
            adaptive_inputs = None
            if isinstance(embedder, AdaptiveEmbedding):
                adaptive_inputs = embedder
            elif hasattr(embedder, 'token_embedder_adaptive'):
                adaptive_inputs = embedder.token_embedder_adaptive
            elif tie_adaptive_weights:
                raise ValueError('Cannot locate adaptive_inputs.')
            self.adaptive_softmax = AdaptiveSoftmax(
                vocab_size,
                output_embed_dim,
                eval_str_list(adaptive_softmax_cutoff, type=int),
                dropout=adaptive_softmax_dropout,
                adaptive_inputs=adaptive_inputs,
                factor=adaptive_softmax_factor,
                tie_proj=tie_adaptive_proj,
            )
        elif not self.share_input_output_embed:
            self.embed_out = nn.Parameter(
                torch.Tensor(vocab_size, output_embed_dim))
            nn.init.normal_(self.embed_out, mean=0,
                            std=output_embed_dim ** -0.5)