def sent_level_attention(self): with tf.variable_scope('sent-level') as scope: sent_inputs = tf.reshape(self.word_outputs, [-1, self.max_sent_length, 2 * self.cell_dim]) # sentence encoder cell_fw = rnn.GRUCell(self.cell_dim, name='cell_fw') cell_bw = rnn.GRUCell(self.cell_dim, name='cell_bw') init_state_fw = tf.tile(tf.get_variable('init_state_fw', shape=[1, self.cell_dim], initializer=tf.constant_initializer(0)), multiples=[get_shape(sent_inputs)[0], 1]) init_state_bw = tf.tile(tf.get_variable('init_state_bw', shape=[1, self.cell_dim], initializer=tf.constant_initializer(0)), multiples=[get_shape(sent_inputs)[0], 1]) rnn_outputs, _ = bidirectional_rnn(cell_fw=cell_fw, cell_bw=cell_bw, inputs=sent_inputs, input_lengths=self.sent_lengths, initial_state_fw=init_state_fw, initial_state_bw=init_state_bw, scope=scope) sent_outputs, sent_att_weights = attention(inputs=rnn_outputs, att_dim=self.att_dim, sequence_lengths=self.sent_lengths) self.sent_outputs = tf.layers.dropout(sent_outputs, self.dropout_rate, training=self.is_training)
def _init_word_encoder(self): ''' Build Word Encoder part as in the paper :return: ''' with tf.variable_scope('word-encoder') as scope: # collapses num docs,num of sentences and creates (number sentences, number words,embedding) # treats each sentece independent of docs, sentence location word_inputs = tf.reshape(self.embedded_inputs, [-1, self.max_word_length, self.emb_size]) # containing the length of each sentence word_lengths = tf.reshape(self.word_lengths, [-1]) # define forward and backword GRU cells cell_fw = rnn.GRUCell(self.cell_dim, name='cell_fw') cell_bw = rnn.GRUCell(self.cell_dim, name='cell_bw') # initialize state of forward GRU cell as 0's, for each sentence in batch init_state_fw = tf.tile(tf.get_variable( 'init_state_fw', shape=[1, self.cell_dim], initializer=tf.constant_initializer(0)), multiples=[get_shape(word_inputs)[0], 1]) # same but for backward GRU cell init_state_bw = tf.tile(tf.get_variable( 'init_state_bw', shape=[1, self.cell_dim], initializer=tf.constant_initializer(0)), multiples=[get_shape(word_inputs)[0], 1]) # bidirectional_rnn returns outputs, state; why do we keep the output and not hidden state??? rnn_outputs, _ = bidirectional_rnn(cell_fw=cell_fw, cell_bw=cell_bw, inputs=word_inputs, input_lengths=word_lengths, initial_state_fw=init_state_fw, initial_state_bw=init_state_bw, scope=scope) # rnn_outputs.shape = [number sentences, number words, 2*self.cell_dim] # word_outputs sentence vectors, word_att_weights alpha # output dim for word_outputs (num sentences,1,2* hidden state cell dim); sentence vectors as in paper word_outputs, word_att_weights = attention( inputs=rnn_outputs, att_dim=self.att_dim, sequence_lengths=word_lengths) # apply dropout, only activate during training self.word_outputs = tf.layers.dropout(word_outputs, self.dropout_rate, training=self.is_training)
def _init_sent_encoder(self): ''' Build Sentence Encoder part as in the paper :return: ''' with tf.variable_scope('sent-encoder') as scope: # input shape: (number docs, max sentence per document, 2*cell_dim) sent_inputs = tf.reshape( self.word_outputs, [-1, self.max_sent_length, 2 * self.cell_dim]) # sentence encoder cell_fw = rnn.GRUCell(self.cell_dim, name='cell_fw') cell_bw = rnn.GRUCell(self.cell_dim, name='cell_bw') # for each document get the hidden state array init_state_fw = tf.tile(tf.get_variable( 'init_state_fw', shape=[1, self.cell_dim], initializer=tf.constant_initializer(0)), multiples=[get_shape(sent_inputs)[0], 1]) init_state_bw = tf.tile(tf.get_variable( 'init_state_bw', shape=[1, self.cell_dim], initializer=tf.constant_initializer(0)), multiples=[get_shape(sent_inputs)[0], 1]) rnn_outputs, _ = bidirectional_rnn(cell_fw=cell_fw, cell_bw=cell_bw, inputs=sent_inputs, input_lengths=self.sent_lengths, initial_state_fw=init_state_fw, initial_state_bw=init_state_bw, scope=scope) # rnn_outputs.shape = [num docs, number sentences, 2*self.cell_dim] # Returns document vectors # output dim for word_outputs (num docs,1,2* hidden state cell dim); sentence vectors as in paper sent_outputs, sent_att_weights = attention( inputs=rnn_outputs, att_dim=self.att_dim, sequence_lengths=self.sent_lengths) #dropout self.sent_outputs = tf.layers.dropout(sent_outputs, self.dropout_rate, training=self.is_training)
def __init__(self, embedding_count_dict, embedding_dim_dict, embedding_features_list, user_behavior_features, activation="PReLU"): super(DIN, self).__init__(embedding_count_dict, embedding_dim_dict, embedding_features_list, user_behavior_features, activation) #Init Embedding Layer self.embedding_dim_dict = embedding_dim_dict self.embedding_count_dict = embedding_count_dict self.embedding_layers = dict() for feature in embedding_features_list: self.embedding_layers[feature] = layers.Embedding( embedding_count_dict[feature], embedding_dim_dict[feature]) #DIN Attention+Sum pooling self.hist_at = attention( utils.get_input_dim(embedding_dim_dict, user_behavior_features)) #Init Fully Connection Layer self.fc = tf.keras.Sequential() self.fc.add(layers.BatchNormalization()) self.fc.add(layers.Dense(200, activation="relu")) if activation == "Dice": self.fc.add(Dice()) elif activation == "dice": self.fc.add(dice(200)) elif activation == "PReLU": self.fc.add(layers.PReLU(alpha_initializer='zeros', weights=None)) self.fc.add(layers.Dense(80, activation="relu")) if activation == "Dice": self.fc.add(Dice()) elif activation == "dice": self.fc.add(dice(80)) elif activation == "PReLU": self.fc.add(layers.PReLU(alpha_initializer='zeros', weights=None)) self.fc.add(layers.Dense(2, activation=None))
def _init_word_encoder(self): with tf.variable_scope('word-encoder') as scope: word_inputs = tf.reshape(self.embedded_inputs, [-1, self.max_word_length, self.emb_size]) word_lengths = tf.reshape(self.word_lengths, [-1]) # word encoder cell_fw = rnn.GRUCell(self.cell_dim, name='cell_fw') cell_bw = rnn.GRUCell(self.cell_dim, name='cell_bw') init_state_fw = tf.tile(tf.get_variable( 'init_state_fw', shape=[1, self.cell_dim], initializer=tf.constant_initializer(0)), multiples=[get_shape(word_inputs)[0], 1]) init_state_bw = tf.tile(tf.get_variable( 'init_state_bw', shape=[1, self.cell_dim], initializer=tf.constant_initializer(0)), multiples=[get_shape(word_inputs)[0], 1]) rnn_outputs, _ = bidirectional_rnn(cell_fw=cell_fw, cell_bw=cell_bw, inputs=word_inputs, input_lengths=word_lengths, initial_state_fw=init_state_fw, initial_state_bw=init_state_bw, scope=scope) word_outputs, word_att_weights = attention( inputs=rnn_outputs, att_dim=self.att_dim, sequence_lengths=word_lengths) self.word_outputs = tf.layers.dropout(word_outputs, self.dropout_rate, training=self.is_training)
def __init__(self, sess, dense_units, rnn_units, max_length, nb_items, use_attention, model_type, batch_size, top_k, rnn_cell_type, rnn_dropout_rate, seed, learning_rate): super().__init__(sess, dense_units, rnn_units, max_length, nb_items, use_attention, model_type, batch_size, top_k, rnn_cell_type, rnn_dropout_rate, seed, learning_rate) with tf.variable_scope(self.scope): self.bseq_support = tf.placeholder(dtype=tf.float32, shape=(batch_size, self.max_length, self.nb_items), name='bseq_support') self.bseq_support_length = tf.placeholder( dtype=tf.int32, shape=(batch_size, ), name='bseq_support_length') self.bseq_target = tf.placeholder(dtype=tf.float32, shape=(batch_size, self.max_length, self.nb_items), name='bseq_target') self.bseq_target_length = tf.placeholder(dtype=tf.int32, shape=(batch_size, ), name='bseq_target_length') self.y = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_items), name='target_item') # Encode the support basket sequence bseq_support_encoder = layers.create_basket_encoder( self.bseq_support, self.dense_units, param_initializer=tf.initializers.he_uniform(), activation_func=tf.nn.relu) bseq_support_encoder = layers.create_rnn_encoder( bseq_support_encoder, self.rnn_units, rnn_dropout_rate, self.bseq_support_length, rnn_cell_type, param_initializer=tf.initializers.glorot_uniform(), seed=self.seed, name="Bseq_Support_Encoder") # Encode the target basket sequence bseq_target_encoder = layers.create_basket_encoder( self.bseq_target, self.dense_units, param_initializer=tf.initializers.he_uniform(), activation_func=tf.nn.relu, reuse=True) with tf.variable_scope("Aggregate_Layer"): if use_attention: support_output = layers.attention( bseq_support_encoder, self.rnn_units, name="bseq_support_attention") else: # Hack to build the indexing and retrieve the right output. support_output = layers.get_last_right_output( bseq_support_encoder, self.max_length, self.bseq_support_length, self.rnn_units) target_output = layers.get_last_right_output( bseq_target_encoder, self.max_length, self.bseq_target_length, self.dense_units) W_Agg_S = tf.get_variable(dtype=tf.float32, initializer=tf.random_normal( (self.rnn_units, self.nb_items), stddev=0.01), name="W_Agg_S") W_Agg_T = tf.get_variable( dtype=tf.float32, initializer=tf.random_normal( (self.dense_units, self.nb_items), stddev=0.01), name="W_Agg_T") B_Agg = tf.get_variable(dtype=tf.float32, initializer=tf.random_normal( (1, self.nb_items), stddev=0.01), name="B_Agg") logits = tf.matmul(support_output, W_Agg_S) + tf.matmul( target_output, W_Agg_T) + B_Agg with tf.name_scope("Optimization"): self.create_optimization_block(logits, self.y, top_k)
def __init__(self, vocab_size, max_enc_len, max_dec_len, embedding_dim=100, hidden_size=128, n_layers=1, bidirectional=False, pretrained_embeddings=None, trainable_embeddings=True, shared_embeddings=True, weight_tying=False, rnn_cell=tf.contrib.rnn.GRUCell): self.vocab_size = vocab_size self.max_enc_len = max_enc_len self.max_dec_len = max_dec_len self.embedding_dim = embedding_dim self.hidden_size = hidden_size self.n_layers = n_layers self.bidirectional = bidirectional self.trainable_embeddings = trainable_embeddings self.shared_embeddings = shared_embeddings self.weight_tying = weight_tying self.rnn_cell = rnn_cell self._pretrained_embeddings = pretrained_embeddings self.enc_inputs = tf.placeholder(tf.int32, [None, self.max_enc_len], name='enc_inputs') self.dec_inputs = tf.placeholder(tf.int32, [None, self.max_dec_len], name='dec_inputs') self.enc_lens = tf.placeholder(tf.int32, [None], name='enc_lens') self.dec_lens = tf.placeholder(tf.int32, [None], name='dec_lens') self.dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob') self.teacher_forcing = tf.placeholder(tf.bool, name='teacher_forcing') self.teacher_forcing_mask = tf.placeholder(tf.int32, [self.max_dec_len], name='teacher_forcing_mask') self.is_training = tf.placeholder(tf.bool, name='is_training') self.sample_decoding = tf.placeholder(tf.bool, name='sample_decoding') with tf.variable_scope('embeddings_layer'): if self.shared_embeddings == True: self.enc_embeddings = layers.embeddings_layer( self.vocab_size, self.embedding_dim, trainable=self.trainable_embeddings, pretrained_embeddings=self._pretrained_embeddings, name='embeddings') self.dec_embeddings = self.enc_embeddings else: self.enc_embeddings = layers.embeddings_layer( self.vocab_size, self.embedding_dim, trainable=self.trainable_embeddings, pretrained_embeddings=self._pretrained_embeddings, name='enc_embeddings') self.dec_embeddings = layers.embeddings_layer( self.vocab_size, self.embedding_dim, trainable=self.trainable_embeddings, pretrained_embeddings=self._pretrained_embeddings, name='dec_embeddings') enc_inputs_embd = tf.nn.embedding_lookup(self.enc_embeddings, self.enc_inputs) dec_inputs_embd = tf.nn.embedding_lookup(self.dec_embeddings, self.dec_inputs) def cell(): return tf.contrib.rnn.DropoutWrapper( self.rnn_cell(num_units=self.hidden_size), output_keep_prob=self.dropout_keep_prob) with tf.variable_scope('encoder'): if self.n_layers > 1: self.enc_cell = tf.contrib.rnn.MultiRNNCell( [cell() for _ in range(self.n_layers)]) else: self.enc_cell = cell() enc_outputs, final_output, final_state = encoder.RNN_encoder( enc_inputs_embd, self.enc_cell, self.hidden_size, input_lens=self.enc_lens) with tf.variable_scope('attention'): self.attn_W = tf.Variable(tf.truncated_normal( [self.hidden_size, self.hidden_size]), name='attn_W') self.attn_v = tf.Variable(tf.truncated_normal([self.hidden_size]), name='attn_v') attn_state = layers.attention(enc_outputs, self.attn_W, self.attn_v, self.enc_lens, self.hidden_size) with tf.variable_scope('decoder'): if self.n_layers > 1: self.dec_cell = tf.contrib.rnn.MultiRNNCell( [cell() for _ in range(self.n_layers)]) else: self.dec_cell = cell() if self.weight_tying == True: proj_W = tf.Variable(tf.truncated_normal( [2 * self.hidden_size, self.embedding_dim]), name='proj_W') self.dec_W = tf.tanh( tf.matmul(proj_W, tf.transpose(self.dec_embeddings, [1, 0]))) else: self.dec_W = tf.Variable(tf.truncated_normal( [2 * self.hidden_size, self.vocab_size]), name='dec_W') self.dec_b = tf.Variable(tf.constant(0.0, shape=[self.vocab_size]), name='dec_b') self.go_var = tf.Variable(tf.truncated_normal([self.embedding_dim ]), name='go_var') (self.logits, self.dec_states, self.generated_words) = decoder.RNN_basic_attn_decoder( dec_inputs_embd, self.dec_cell, self.go_var, self.dec_W, self.dec_b, self.dec_embeddings, final_state, attn_state, self.hidden_size, teacher_forcing=self.teacher_forcing, teacher_forcing_mask=self.teacher_forcing_mask, sample_decoding=self.sample_decoding)
def acrnn(inputs, num_classes=7, is_training=True, L1=128, L2=256, cell_units=128, num_linear=768, p=10, time_step=150, F1=64, dropout_keep_prob=1): """ Attention-based convolutional recurrent neural network Adapted from https://github.com/xuanjihe/speech-emotion-recognition/blob/master/model.py Mingyi Chen, Xuanji He, Jing Yang, Han Zhang, "3-D Convolutional Recurrent Neural Networks With Attention Model for Speech Emotion Recognition", IEEE Signal Processing Letters, vol. 25, no. 10, pp. 1440-1444, 2018. """ # Fetch filter, weights and bias filters, weights, bias = get_dict(num_classes, L1, L2, cell_units, num_linear, F1, p) # Covolutional layer 1 conv1 = tf.nn.conv2d(inputs, filters["conv1"], strides=[1, 1, 1, 1], padding='SAME') conv1 = tf.nn.bias_add(conv1, bias["conv1"]) conv1 = leaky_relu(conv1, 0.01) conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 4, 1], strides=[1, 2, 4, 1], padding='VALID', name='max_pool') conv1 = tf.contrib.layers.dropout(conv1, keep_prob=dropout_keep_prob, is_training=is_training) # layer1: [batch_size, 150, 10, 128] # Convolutional layer 2 conv2 = tf.nn.conv2d(conv1, filters["conv2"], strides=[1, 1, 1, 1], padding='SAME') conv2 = tf.nn.bias_add(conv2, bias["conv2"]) conv2 = leaky_relu(conv2, 0.01) conv2 = tf.contrib.layers.dropout(conv2, keep_prob=dropout_keep_prob, is_training=is_training) conv2 = tf.reshape(conv2,[-1,time_step,L2*p]) conv2 = tf.reshape(conv2, [-1,p*L2]) # layer2: [None, 2560] # Linear layer linear1 = tf.matmul(conv2, weights["linear1"]) + bias["linear1"] linear1 = batch_norm_wrapper(linear1, is_training) linear1 = leaky_relu(linear1, 0.01) linear1 = tf.reshape(linear1, [-1, time_step, num_linear]) # LSTM layer # Forward direction cell gru_fw_cell1 = tf.contrib.rnn.BasicLSTMCell(cell_units, forget_bias=1.0) # Backward direction cell gru_bw_cell1 = tf.contrib.rnn.BasicLSTMCell(cell_units, forget_bias=1.0) # Now we feed `layer_3` into the LSTM BRNN cell and obtain the LSTM BRNN output. outputs1, output_states1 = tf.nn.bidirectional_dynamic_rnn(cell_fw=gru_fw_cell1, cell_bw=gru_bw_cell1, inputs= linear1, dtype=tf.float32, time_major=False, scope='LSTM1') # Attention layer gru, alphas = attention(outputs1, 1, return_alphas=True) # Fully connected layer fully1 = tf.matmul(gru, weights["fully1"]) + bias["fully1"] fully1 = leaky_relu(fully1, 0.01) fully1 = tf.nn.dropout(fully1, dropout_keep_prob) Ylogits = tf.matmul(fully1, weights["fully2"]) + bias["fully2"] return Ylogits