def call(self, decoder_inputs, encoder_outputs, decoder_self_attention_bias, attention_bias): batch_size, length, hidden_size = tf.unstack(tf.shape(decoder_inputs)) act = ACT(batch_size, length, hidden_size) halt_threshold = 1.0 - self.hparams.act_epsilon state = decoder_inputs previous_state = tf.zeros_like(state, name='previous_state') for step in range(self.hparams.act_max_step): # judge to continue if not act.should_continue(halt_threshold): break # position and timestep encoding state += model_utils.get_position_encoding(self.hparams.max_length, hidden_size) state += model_utils.get_timestep_encoding( step, self.hparams.act_max_step, hidden_size) # to judge pondering pondering = self.pondering_layer(state) pondering = tf.squeeze(pondering, axis=-1) # proceed act step update_weights = act(pondering, halt_threshold) update_weights = act(pondering, halt_threshold) if (num_head_3logit): self_attention_layer = SelfAttention(hparams.num_units, 3, hparams.dropout_rate, is_train) elif (num_head_5logit): self_attention_layer = SelfAttention(hparams.num_units, 5, hparams.dropout_rate, is_train) ffn_layer = FeedForwardNetwork(hparams.num_units, hparams.num_filter_units, hparams.dropout_rate, is_train) self.self_attention_wrapper = LayerWrapper(self_attention_layer, hparams.num_units, hparams.dropout_rate, is_train) self.ffn_wrapper = LayerWrapper(ffn_layer, hparams.num_units, hparams.dropout_rate, is_train) self.output_norm = LayerNormalization(hparams.num_units) state = self.self_attention_wrapper(state, decoder_self_attention_bias) state = self.enc_dec_attention_wrapper(state, encoder_outputs, attention_bias) state = self.ffn_wrapper(state) # update new state and previous state new_state = (state * update_weights) + (previous_state * (1 - update_weights)) previous_state = new_state return self.output_norm(new_state), act.n_updates, act.remainders
def __init__(self, wordEmbedding, hidden_size, self_att_hidden, n_hops, paddingId, updateEmbedding, dropout=None): super(Dense_Self_Attention, self).__init__() embeddingSize = wordEmbedding.getEmbeddingSize() self.embedding = nn.Embedding( wordEmbedding.getNumberOfVectors(), embeddingSize, padding_idx=wordEmbedding.getPaddingIdx()) self.embedding.weight.data.copy_( torch.from_numpy(wordEmbedding.getEmbeddingMatrix())) self.embedding.weight.requires_grad = updateEmbedding self.dense = nn.Linear(embeddingSize, embeddingSize) if hidden_size else None self.self_attention = SelfAttention(embeddingSize, self_att_hidden, n_hops) self.paddingId = paddingId self.output_size = self.self_attention.getOutputSize() self.dropout = nn.Dropout(dropout) if dropout else None
def __init__( self, glove_path: str, model_params: ModelParams, hidden_dimension: int, ): super().__init__() self.model_params = model_params self.word_embedding_net = WordEmbedding( vocabulary_size=model_params.vocabulary_size, pretrained_vectors_file=glove_path, embedding_dimension=model_params.word_embedding_dimension, dropout=0.25, ) self.question_embedding_net = QuestionEmbedding( input_dimension=model_params.word_embedding_dimension, number_hidden_units=hidden_dimension, number_of_layers=1, ) self.question_projection_net = MultiLayerNet( dimensions=[hidden_dimension, hidden_dimension], dropout=0.5) self.image_projection_net = MultiLayerNet( dimensions=[ model_params.object_embedding_dimension, hidden_dimension, ], dropout=0.5, ) if self.model_params.add_self_attention: self.question_self_attention_net = SelfAttention(hidden_dimension, dropout=0.3) self.visual_self_attention_net = SelfAttention(hidden_dimension, dropout=0.3) self.question_attention_net = Attention(model_params.number_of_objects, dropout=0.3) self.visual_attention_net = Attention( model_params.question_sequence_length, dropout=0.3) if model_params.fusion_method == FusionMethod.CONCAT: factor = 3 if self.model_params.add_self_attention else 2 self.classifier = Classifier( input_dimension=hidden_dimension * 3, hidden_dimension=hidden_dimension * 4, output_dimension=model_params.num_ans_candidates, dropout=0.5, ) elif model_params.fusion_method == FusionMethod.HADAMARD: self.classifier = Classifier( input_dimension=hidden_dimension, hidden_dimension=hidden_dimension * 2, output_dimension=model_params.num_ans_candidates, dropout=0.5, )
class Dense_Self_Attention(nn.Module): def __init__(self, wordEmbedding, hidden_size, self_att_hidden, n_hops, paddingId, updateEmbedding, dropout=None): super(Dense_Self_Attention, self).__init__() embeddingSize = wordEmbedding.getEmbeddingSize() self.embedding = nn.Embedding( wordEmbedding.getNumberOfVectors(), embeddingSize, padding_idx=wordEmbedding.getPaddingIdx()) self.embedding.weight.data.copy_( torch.from_numpy(wordEmbedding.getEmbeddingMatrix())) self.embedding.weight.requires_grad = updateEmbedding self.dense = nn.Linear(embeddingSize, embeddingSize) if hidden_size else None self.self_attention = SelfAttention(embeddingSize, self_att_hidden, n_hops) self.paddingId = paddingId self.output_size = self.self_attention.getOutputSize() self.dropout = nn.Dropout(dropout) if dropout else None def forward(self, x): mask = (x != self.paddingId).float() x = self.embedding(x) if self.dense is not None: res = F.relu(self.dense(x)) * mask.unsqueeze(2) x = x + res x, att = self.self_attention(x, mask) x = x.view(x.size(0), self.output_size) if self.dropout: self.dropout(x) return x def getOutputSize(self): return self.self_attention.getOutputSize()
def __init__(self, hparams, is_train): super(DecoderStack, self).__init__() self.my_layers = [] self.hparams = hparams self_attention_layer = SelfAttention(hparams['num_units'], hparams['num_heads'], hparams['dropout_rate'], is_train) enc_dec_attention_layer = MultiheadAttention(hparams['num_units'], hparams['num_heads'], hparams['dropout_rate'], is_train) ffn_layer = FeedForwardNetwork(hparams['num_units'], hparams['num_filter_units'], hparams['dropout_rate'], is_train) self.self_attention_wrapper = LayerWrapper(self_attention_layer, hparams['num_units'], hparams['dropout_rate'], is_train) self.enc_dec_attention_wrapper = LayerWrapper(enc_dec_attention_layer, hparams['num_units'], hparams['dropout_rate'], is_train) self.ffn_wrapper = LayerWrapper(ffn_layer, hparams['num_units'], hparams['dropout_rate'], is_train) self.output_norm = LayerNormalization(hparams['num_units']) self.pondering_layer = tf.keras.layers.Dense( 1, activation=tf.nn.sigmoid, use_bias=True, bias_initializer=tf.constant_initializer(1.0))
def __init__(self, hparams, is_train): super(DecoderStack, self).__init__() self.my_layers = [] for i in range(hparams['num_layers']): self_attention_layer = SelfAttention(hparams['num_units'], hparams['num_heads'], hparams['dropout_rate'], is_train) enc_dec_attention_layer = MultiheadAttention( hparams['num_units'], hparams['num_heads'], hparams['dropout_rate'], is_train) ffn_layer = FeedForwardNetwork(hparams['num_units'], hparams['num_filter_units'], hparams['dropout_rate'], is_train) self.my_layers.append([ LayerWrapper(self_attention_layer, hparams['num_units'], hparams['dropout_rate'], is_train), LayerWrapper(enc_dec_attention_layer, hparams['num_units'], hparams['dropout_rate'], is_train), LayerWrapper(ffn_layer, hparams['num_units'], hparams['dropout_rate'], is_train), ]) self.output_norm = LayerNormalization(hparams['num_units'])
def __init__(self, hparams, is_train): super(EncoderStack, self).__init__() self.hparams = hparams self_attention_layer = SelfAttention(hparams.num_units, hparams.num_heads, hparams.dropout_rate, is_train) ffn_layer = FeedForwardNetwork(hparams.num_units, hparams.num_filter_units, hparams.dropout_rate, is_train) self.self_attention_wrapper = LayerWrapper(self_attention_layer, hparams.num_units, hparams.dropout_rate, is_train) self.ffn_wrapper = LayerWrapper(ffn_layer, hparams.num_units, hparams.dropout_rate, is_train) self.output_norm = LayerNormalization(hparams.num_units) self.pondering_layer = tf.keras.layers.Dense( 1, activation=tf.nn.sigmoid, use_bias=True, bias_initializer=tf.constant_initializer(1.0)) self.num_head_layer = tf.keras.layers.Dense( 1, activation=tf.nn.sigmoid, use_bias=True, bias_initializer=tf.constant_initializer(1.0))
def self_attention(input_tensor, att_dim, att_width, dropout, masking=True, layer=1, is_residual=True): for _ in range(layer): output_tensor = SelfAttention(att_dim, att_width, dropout, masking)(input_tensor) if is_residual: input_tensor = concatenate([input_tensor, output_tensor]) input_tensor = TimeDistributed(Dense( att_dim, activation='relu'))(input_tensor) else: input_tensor = output_tensor return input_tensor
def processDescriptionParam(descOpts, bugReportDatabase, inputHandlers, preprocessors, encoders, databasePath, cacheFolder, logger, paddingSym): # Use summary and description (concatenated) to address this problem logger.info("Using Description information.") # Loading word embedding lexicon, embedding = load_embedding(descOpts, paddingSym) logger.info("Lexicon size: %d" % (lexicon.getLen())) logger.info("Word Embedding size: %d" % (embedding.getEmbeddingSize())) paddingId = lexicon.getLexiconIndex(paddingSym) # Loading Filters filters = loadFilters(descOpts['filters']) # Tokenizer if descOpts['tokenizer'] == 'default': logger.info("Use default tokenizer to tokenize summary information") tokenizer = MultiLineTokenizer() elif descOpts['tokenizer'] == 'white_space': logger.info("Use white space tokenizer to tokenize summary information") tokenizer = WhitespaceTokenizer() else: raise ArgumentError( "Tokenizer value %s is invalid. You should choose one of these: default and white_space" % descOpts['tokenizer']) arguments = ( databasePath, descOpts['word_embedding'], str(descOpts['lexicon']), ' '.join(sorted([fil.__class__.__name__ for fil in filters])), descOpts['tokenizer'], "description") descCache = PreprocessingCache(cacheFolder, arguments) descPreprocessor = DescriptionPreprocessor(lexicon, bugReportDatabase, filters, tokenizer, paddingId, descCache) preprocessors.append(descPreprocessor) if descOpts['encoder_type'] == 'rnn': rnnType = descOpts.get('rnn_type') hiddenSize = descOpts.get('hidden_size') bidirectional = descOpts.get('bidirectional', False) numLayers = descOpts.get('num_layers', 1) dropout = descOpts.get('dropout', 0.0) updateEmb = descOpts.get('update_embedding', False) fixedOpt = descOpts.get('fixed_opt', False) descRNN = SortedRNNEncoder(rnnType, embedding, hiddenSize, numLayers, bidirectional, updateEmb, dropout) if fixedOpt == 'self_att': att = SelfAttention(descRNN.getOutputSize(), descOpts['self_att_hidden'], descOpts['n_hops']) descEncoder = RNN_Self_Attention(descRNN, att, paddingId, dropout) else: descEncoder = RNNFixedOuput(descRNN, fixedOpt, dropout) encoders.append(descEncoder) inputHandlers.append(RNNInputHandler(paddingId)) elif descOpts['encoder_type'] == 'cnn': windowSizes = descOpts.get('window_sizes', [3]) nFilters = descOpts.get('nfilters', 100) updateEmb = descOpts.get('update_embedding', False) actFunc = loadActivationFunction(descOpts.get('activation', 'relu')) batchNorm = descOpts.get('batch_normalization', False) dropout = descOpts.get('dropout', 0.0) descEncoder = TextCNN(windowSizes, nFilters, embedding, updateEmb, actFunc, batchNorm, dropout) encoders.append(descEncoder) inputHandlers.append(TextCNNInputHandler(paddingId, max(windowSizes))) elif descOpts['encoder_type'] == 'cnn+dense': windowSizes = descOpts.get('window_sizes', [3]) nFilters = descOpts.get('nfilters', 100) updateEmb = descOpts.get('update_embedding', False) actFunc = loadActivationFunction(descOpts.get('activation', 'relu')) batchNorm = descOpts.get('batch_normalization', False) dropout = descOpts.get('dropout', 0.0) hiddenSizes = descOpts.get('hidden_sizes') hiddenAct = loadActivationClass(descOpts.get('hidden_act')) hiddenDropout = descOpts.get('hidden_dropout') batchLast = descOpts.get("bn_last_layer", False) cnnEnc = TextCNN(windowSizes, nFilters, embedding, updateEmb, actFunc, batchNorm, dropout) descEncoder = MultilayerDense(cnnEnc, hiddenSizes, hiddenAct, batchNorm, batchLast, hiddenDropout) encoders.append(descEncoder) inputHandlers.append(TextCNNInputHandler(paddingId, max(windowSizes))) elif descOpts['encoder_type'] == 'dense+self_att': dropout = descOpts.get('dropout', 0.0) hiddenSize = descOpts.get('hidden_size') self_att_hidden = descOpts['self_att_hidden'] n_hops = descOpts['n_hops'] updateEmb = descOpts.get('update_embedding', False) descEncoder = Dense_Self_Attention(embedding, hiddenSize, self_att_hidden, n_hops, paddingId, updateEmb, dropout=dropout) encoders.append(descEncoder) inputHandlers.append(TextCNNInputHandler(paddingId, -1)) elif descOpts['encoder_type'] == 'word_mean': standardization = descOpts.get('standardization', False) dropout = descOpts.get('dropout', 0.0) updateEmb = descOpts.get('update_embedding', False) batch_normalization = descOpts.get('update_embedding', False) hiddenSize = descOpts.get('hidden_size') descEncoder = WordMean( embedding, updateEmb, hiddenSize, standardization, dropout, batch_normalization) encoders.append(descEncoder) inputHandlers.append(RNNInputHandler(paddingId)) else: raise ArgumentError( "Encoder type of summary and description is invalid (%s). You should choose one of these: cnn" % descOpts['encoder_type'])