def call(self, x, sequence_length=None, axis=1): logits = self.FFN(x) alphas = tf.nn.softmax( logits) if sequence_length is None else melt.masked_softmax( logits, sequence_length) encoding = tf.reduce_sum(x * alphas, 1) # [batch_size, sequence_length, 1] -> [batch_size, sequence_length] self.alphas = tf.squeeze(alphas, -1) #self.alphas = alphas tf.add_to_collection('self_attention', self.alphas) return encoding
def call(self, outputs, sequence_length=None, axis=1): self.step += 1 if self.step == 0 and self.dense is None: self.dense = layers.Dense(melt.get_shape(outputs, -1), activation=self.activation) x = self.dense(outputs) logits = self.logits(x) alphas = tf.nn.softmax( logits) if sequence_length is None else melt.masked_softmax( logits, sequence_length) encoding = tf.reduce_sum(outputs * alphas, 1) # [batch_size, sequence_length, 1] -> [batch_size, sequence_length] self.alphas = tf.squeeze(alphas, -1) #self.alphas = alphas tf.add_to_collection('self_attention', self.alphas) return encoding