trainable = True) # load pretrained_emb ## see the backup: how to deal with too large emb else: #embedding = tf.get_variable('embedding', [vocab_size, embedding_size], trainable=False) embedding = tf.get_variable('embedding', initializer = emb, trainable = finetune_emb) X_embed = tf.nn.embedding_lookup(embedding, X) # None, doc_s, sen_s, embed_s with tf.name_scope('rnn_layer'): rnn_outputs, rnn_states = rnn_layer.bi_rnn(X_embed, n_hidden = n_hidden, seq_len = seq_length, n_layer = n_layer, is_train = is_training, keep_prob = keep_prob) #### need seq_length?? with tf.name_scope('attention_layer'): atten_out, soft_atten_weights = attn_layer.atten_layer_project(rnn_outputs, atten_size, n_layer = n_layer, l2reg = l2reg, seq_len = seq_length, use_mask = use_mask) # Dropout atten_out_drop = tf.nn.dropout(atten_out, keep_prob) with tf.name_scope('logits'): optimizer, logits, cost, accuracy, Y_proba = model.clf_train_op(atten_out_drop, y, ac_fn = tf.nn.relu, lr = lr, l2reg = l2reg, n_class = n_class) init, saver = model.initializer() # In[15]: print(X_embed)
sen_rnn_outputs, sen_rnn_states = rnn_layer.bi_rnn(X_embed_reshape, n_hidden=n_hidden, seq_len=tf.reshape( sen_seq_length, [-1]), n_layer=n_layer, is_train=is_training, keep_prob=keep_prob, scope='sen_rnn_block') with tf.name_scope('sen_attn'): sen_atten_out, sen_atten_w = attn_layer.atten_layer_project( sen_rnn_outputs, atten_size, n_layer=n_layer, l2reg=l2reg, seq_len=tf.reshape(sen_seq_length, [-1]), use_mask=use_mask, sen_CLS=sen_CLS, scope='sen_attn_block') # Dropout #sen_atten_out_drop = tf.layers.dropout(sen_atten_out, rate = 1-0.5, training = is_training) # tf.nn.dropout #with tf.name_scope('sen_stack'): #sen_outs = stack_layer(sen_atten_out, sen_atten_w, sen_rnn_states, X_embed_reshape, scope = 'sen_stack_block') #sen_outs = stack_layer(sen_atten_out, sen_rnn_states, scope = 'sen_stack_block') #sen_outs_drop = tf.layers.dropout(sen_outs, rate = 1-0.5, training = is_training) #with tf.device('/gpu:2'): with tf.name_scope('doc_rnn'): doc_inputs = tf.reshape(sen_atten_out,
## see the backup: how to deal with too large emb else: #embedding = tf.get_variable('embedding', [vocab_size, embedding_size], trainable=False) embedding = tf.get_variable('embedding', initializer = emb, trainable = finetune_emb) X_embed = tf.nn.embedding_lookup(embedding, X) # None, doc_s, sen_s, embed_s #with tf.device('/gpu:1'): with tf.name_scope('sen_rnn'): X_embed_reshape = tf.reshape(X_embed, [-1, sen_len, embedding_size]) sen_rnn_outputs, sen_rnn_states = rnn_layer.bi_rnn(X_embed_reshape, n_hidden = n_hidden, seq_len = sen_seq_length, n_layer = n_layer, is_train = is_training, keep_prob = keep_prob, scope = 'sen_rnn_block') with tf.name_scope('sen_attn'): sen_atten_out, sen_atten_w = attn_layer.atten_layer_project(sen_rnn_outputs, atten_size, n_layer = n_layer, l2reg = l2reg, seq_len = sen_seq_length, use_mask = use_mask, scope = 'sen_attn_block') # Dropout #sen_atten_out_drop = tf.layers.dropout(sen_atten_out, rate = 1-0.5, training = is_training) # tf.nn.dropout #with tf.name_scope('sen_stack'): #sen_outs = stack_layer(sen_atten_out, sen_atten_w, sen_rnn_states, X_embed_reshape, scope = 'sen_stack_block') #sen_outs = stack_layer(sen_atten_out, sen_rnn_states, scope = 'sen_stack_block') #sen_outs_drop = tf.layers.dropout(sen_outs, rate = 1-0.5, training = is_training) #with tf.device('/gpu:2'): with tf.name_scope('doc_rnn'): doc_inputs = tf.reshape(sen_atten_out, [-1, doc_len, sen_atten_out.shape[1]]) #doc_inputs = tf.reshape(sen_outs, [-1, doc_size, sen_outs.shape[1]]) doc_rnn_outputs, doc_rnn_states = rnn_layer.bi_rnn(doc_inputs, n_hidden = n_hidden, seq_len = doc_seq_length, n_layer = n_layer, is_train = is_training,