def conv_block(input_tensor, kernel, filters, name, strides=(2, 2)): """ Function to create block of ResNet network which include three convolution layers and one skip-connection layer. Args: input_tensor: input tensorflow layer kernel: tuple of kernel size in convolution layer filters: list of nums filters in convolution layers name: name of block strides: typle of strides in convolution layer Output: x: Block output layer """ filters1, filters2, filters3 = filters x = tf.layers.conv2d(input_tensor, filters1, (1, 1), strides, name='convfir' + name, activation=tf.nn.relu,\ kernel_initializer=xavier()) x = tf.layers.conv2d(x, filters2, kernel, name='convsec' + name, activation=tf.nn.relu, padding='SAME',\ kernel_initializer=xavier()) x = tf.layers.conv2d(x, filters3, (1, 1), name='convthr' + name,\ kernel_initializer=xavier()) shortcut = tf.layers.conv2d(input_tensor, filters3, (1, 1), strides, name='short' + name, \ kernel_initializer=xavier()) x = tf.concat([x, shortcut], axis=1) x = tf.nn.relu(x) return x
def __init__(self, args, wrd_emb): self.max_doc_len = args['max_doc_len'] self.max_sen_len = args['max_sen_len'] self.cls_cnt = args['cls_cnt'] self.embedding = args['embedding'] self.emb_dim = args['emb_dim'] self.hidden_size = self.emb_dim self.prd_cnt = args['prd_cnt'] self.l2_rate = args['l2_rate'] self.debug = args['debug'] self.best_dev_acc = .0 self.best_test_acc = .0 self.best_test_rmse = .0 # initializers for parameters self.w_init = xavier() self.b_init = tf.initializers.zeros() self.e_init = xavier() # embeddings in the model self.wrd_emb = wrd_emb self.prd_emb = var('prd_emb', [self.prd_cnt, self.emb_dim], self.e_init) self.embeddings = [self.wrd_emb, self.prd_emb]
def identity_block(input_tensor, kernel, filters, name): """ Function to create block of ResNet network which include three convolution layers. Args: input_tensor: input tensorflow layer. kernel: tuple of kernel size in convolution layer. filters: list of nums filters in convolution layers. name: name of block. Output: x: Block output layer """ filters1, filters2, filters3 = filters x = tf.layers.conv2d(input_tensor, filters1, (1, 1), name='convfir' + name, activation=tf.nn.relu,\ kernel_initializer=xavier()) x = tf.layers.conv2d(x, filters2, kernel, name='convsec' + name, activation=tf.nn.relu, padding='SAME',\ kernel_initializer=xavier()) x = tf.layers.conv2d(x, filters3, (1, 1), name='convthr' + name,\ kernel_initializer=xavier()) x = tf.concat([x, input_tensor], axis=1) x = tf.nn.relu(x) return x
def __init__(self, args): self.max_doc_len = args['max_doc_len'] self.max_sen_len = args['max_sen_len'] self.cls_cnt = args['cls_cnt'] self.emb_dim = args['emb_dim'] self.hidden_size = args['hidden_size'] self.usr_cnt = args['usr_cnt'] self.prd_cnt = args['prd_cnt'] self.l2_rate = args['l2_rate'] self.debug = args['debug'] self.lambda1 = args['lambda1'] self.lambda2 = args['lambda2'] self.lambda3 = args['lambda3'] self.best_dev_acc = .0 self.best_test_acc = .0 self.best_test_rmse = .0 # initializers for parameters self.weights_initializer = xavier() self.biases_initializer = tf.initializers.zeros() self.emb_initializer = xavier() hsize = self.hidden_size # embeddings in the model with tf.variable_scope('emb'): self.embeddings = { # 'wrd_emb': const(self.embedding, name='wrd_emb', dtype=tf.float32), # 'wrd_emb': tf.Variable(self.embedding, name='wrd_emb', dtype=tf.float32), 'usr_emb': var('usr_emb', [self.usr_cnt, hsize], self.emb_initializer), 'prd_emb': var('prd_emb', [self.prd_cnt, hsize], self.emb_initializer) }
def build_model(self): self.feats = tf.placeholder(tf.float32, [1, self.obs_size], name='input_feats') self.init_c = tf.placeholder(tf.float32, [1, self.hidden_dim]) self.init_h = tf.placeholder(tf.float32, [1, self.hidden_dim]) self.action = tf.placeholder(tf.int32, name='real_action') self.action_mask = tf.placeholder(tf.float32, [self.act_size], name='action_mask') Wi = tf.get_variable('Wi', [self.obs_size, self.hidden_dim], initializer=xavier()) bi = tf.get_variable('bi', [self.hidden_dim], initializer=tf.constant_initializer(0.)) projected = tf.matmul(self.feats, Wi) + bi lstm = tf.contrib.rnn.LSTMCell(self.hidden_dim, state_is_tuple=True) lstm_op, self.state = lstm(inputs=projected, state=(self.init_c, self.init_h)) reshaped = tf.concat(axis=1, values=(self.state.c, self.state.h)) Wo = tf.get_variable('Wo', [2 * self.hidden_dim, self.act_size], initializer=xavier()) bo = tf.get_variable('bo', [self.act_size], initializer=tf.constant_initializer(0.)) self.logits = tf.matmul(reshaped, Wo) + bo self.probs = tf.multiply(tf.squeeze(tf.nn.softmax(self.logits)), self.action_mask) self.pred = tf.arg_max(self.probs, dimension=0) self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.logits, labels=self.action) #?? self.train_op = tf.train.AdadeltaOptimizer(self.lr).minimize(self.loss)
def lstm(inputs, sequence_length, hidden_size, scope): cell_fw = tf.nn.rnn_cell.LSTMCell(hidden_size // 2, forget_bias=0., initializer=xavier()) cell_bw = tf.nn.rnn_cell.LSTMCell(hidden_size // 2, forget_bias=0., initializer=xavier()) outputs, state = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=inputs, sequence_length=sequence_length, dtype=tf.float32, scope=scope) outputs = tf.concat(outputs, axis=2) return outputs, state
def EncoderCNN(self, is_training, init_vec=None): with tf.variable_scope("sentence-encoder", dtype=tf.float32, initializer=xavier(), reuse=tf.AUTO_REUSE): input_dim = self.input_embedding.shape[2] input_sentence = tf.expand_dims(self.input_embedding, axis=1) with tf.variable_scope("conv2d"): conv_kernel = self._GetVar( init_vec=init_vec, key='convkernel', name='kernel', shape=[1, 3, input_dim, FLAGS.hidden_size], trainable=True) conv_bias = self._GetVar(init_vec=init_vec, key='convbias', name='bias', shape=[FLAGS.hidden_size], trainable=True) x = tf.layers.conv2d(inputs=input_sentence, filters=FLAGS.hidden_size, kernel_size=[1, 3], strides=[1, 1], padding='same', reuse=tf.AUTO_REUSE) x = tf.reduce_max(x, axis=2) x = tf.nn.relu(tf.squeeze(x, 1)) return x
def EncoderLSTM(self, is_training, init_vec=None): with tf.variable_scope("sentence-encoder", dtype=tf.float32, initializer=xavier(), reuse=tf.AUTO_REUSE): input_sentence = tf.layers.dropout(self.input_embedding, rate=self.keep_prob, training=is_training) fw_cell = tf.contrib.rnn.BasicLSTMCell(FLAGS.hidden_size, state_is_tuple=True) bw_cell = tf.contrib.rnn.BasicLSTMCell(FLAGS.hidden_size, state_is_tuple=True) outputs, states = tf.nn.bidirectional_dynamic_rnn( fw_cell, bw_cell, input_sentence, sequence_length=self.len, dtype=tf.float32, scope='bi-dynamic-rnn') fw_states, bw_states = states if isinstance(fw_states, tuple): fw_states = fw_states[0] bw_states = bw_states[0] x = tf.concat(states, axis=1) return x
def func_module(input_layer, num_inputs, num_outputs): """ final module which estimates some function (value, q, policy, etc) """ out = input_layer out_weights = tf.Variable(xavier()([num_inputs, num_outputs])) out = tf.matmul(out, out_weights) return out
def fc_module(input_layer, hiddens, activation_fn=tf.nn.relu): """ fully connected module """ out = input_layer for num_outputs in hiddens: out = fc(out, num_outputs=num_outputs, activation_fn=activation_fn, weights_initializer=xavier()) return out
def __init__(self, args): self.embedding = args['embedding'] self.wrd_emb = const(self.embedding, name='wrd_emb', dtype=tf.float32) self.model = NSCLA(args, self.wrd_emb) self.l2_rate = args['l2_rate'] self.cls_cnt = args['cls_cnt'] self.embedding_lr = args['embedding_lr'] self.temperature = args['temperature'] self.align_rate = args['align_rate'] self.task_cnt = args['task_cnt'] self.best_test_acc = 0. self.best_dev_acc = 0. self.best_test_rmse = 0. self.hidden_size = args['emb_dim'] # initializers for parameters self.w_init = xavier() self.b_init = tf.initializers.zeros() self.e_init = xavier()
def resnet(self): """ Simple implementation of Resnet. Args: self Outputs: Method return list with len = 2 and some params: [0][0]: indices - Placeholder which takes batch indices. [0][1]: all_data - Placeholder which takes all images. [0][2]; all_lables - Placeholder for lables. [0][3]: loss - Value of loss function. [0][4]: train - List of train optimizers. [0][5]: prob - softmax output, need to prediction. [1][0]: accuracy - Current accuracy [1][1]: session - tf session """ with tf.Graph().as_default(): indices = tf.placeholder(tf.int32, shape=[None, 1]) all_data = tf.placeholder(tf.float32, shape=[50000, 28, 28]) input_batch = tf.gather_nd(all_data, indices) x1_to_tens = tf.reshape(input_batch, shape=[-1, 28, 28, 1]) net1 = tf.layers.conv2d(x1_to_tens, 32, (7, 7), strides=(2, 2), padding='SAME', activation=tf.nn.relu, \ kernel_initializer=xavier(), name='11') net1 = tf.layers.max_pooling2d(net1, (2, 2), (2, 2)) net1 = conv_block(net1, 3, [32, 32, 128], name='22', strides=(1, 1)) net1 = identity_block(net1, 3, [32, 32, 128], name='33') net1 = conv_block(net1, 3, [64, 64, 256], name='53', strides=(1, 1)) net1 = identity_block(net1, 3, [64, 64, 256], name='63') net1 = tf.layers.average_pooling2d(net1, (7, 7), strides=(1, 1)) net1 = tf.contrib.layers.flatten(net1) with tf.variable_scope('dense3'): net1 = tf.layers.dense(net1, 10, kernel_initializer=tf.contrib.layers.xavier_initializer()) prob1 = tf.nn.softmax(net1) all_lables = tf.placeholder(tf.float32, [None, 10]) y = tf.gather_nd(all_lables, indices) loss1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=net1, labels=y), name='loss3') train1 = tf.train.MomentumOptimizer(0.03, 0.8, use_nesterov=True).minimize(loss1) lables_hat1 = tf.cast(tf.argmax(net1, axis=1), tf.float32, name='lables_3at') lables1 = tf.cast(tf.argmax(y, axis=1), tf.float32, name='labl3es') accuracy1 = tf.reduce_mean(tf.cast(tf.equal(lables_hat1, lables1), tf.float32, name='a3ccuracy')) session = tf.Session() session.run(tf.global_variables_initializer()) return [[indices, all_data, all_lables, loss1, train1, prob1], [accuracy1, session]]
def lstm(inputs, sequence_length, hidden_size, scope, bidirectional=True, lstm_cells=None): if bidirectional: if lstm_cells is None: cell_fw = tf.nn.rnn_cell.LSTMCell(hidden_size // 2, forget_bias=0., initializer=xavier()) cell_bw = tf.nn.rnn_cell.LSTMCell(hidden_size // 2, forget_bias=0., initializer=xavier()) else: cell_fw, cell_bw = lstm_cells outputs, state = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=inputs, sequence_length=sequence_length, dtype=tf.float32, scope=scope) outputs = tf.concat(outputs, axis=2) else: if lstm_cells is None: cell = tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=0., initializer=xavier()) else: cell = lstm_cells outputs, state = tf.nn.dynamic_rnn(cell=cell, inputs=inputs, sequence_length=sequence_length, dtype=tf.float32, scope=scope) outputs = tf.concat(outputs, axis=2) return outputs, state
def __init__(self, args): self.embedding = args['embedding'] self.wrd_emb = const(self.embedding, name='wrd_emb', dtype=tf.float32) self.model_cnt = 3 self.l2_rate = args['l2_rate'] self.cls_cnt = args['cls_cnt'] self.embedding_lr = args['embedding_lr'] self.temperature = args['temperature'] self.align_rate = args['align_rate'] self.task_cnt = args['task_cnt'] self.best_test_acc = 0. self.best_dev_acc = 0. self.best_test_rmse = 0. self.hidden_size = args['emb_dim'] self.model = [] for i in range(self.model_cnt): with tf.variable_scope(f'model{i}'): self.model.append(NSCLA(args, self.wrd_emb)) # initializers for parameters self.w_init = xavier() self.b_init = tf.initializers.zeros() self.e_init = xavier()
def _GetVar(self, init_vec, key, name, shape=None, initializer=xavier(), trainable=True): if init_vec is not None and key in init_vec: print('using pretrained {} and is {}'.format( key, 'trainable' if trainable else 'not trainable')) return tf.get_variable(name=name, initializer=init_vec[key], trainable=trainable) else: return tf.get_variable(name=name, shape=shape, initializer=initializer, trainable=trainable)
def get_initializer(params): if params.initializer == "uniform": max_val = params.initializer_gain return tf.random_uniform_initializer(-max_val, max_val) elif params.initializer == 'pixellink': from tensorflow.contrib.layers import xavier_initializer_conv2d as xavier return xavier() elif params.initializer == "normal": return tf.random_normal_initializer(0.0, params.initializer_gain) elif params.initializer == "normal_unit_scaling": return tf.variance_scaling_initializer(params.initializer_gain, mode="fan_avg", distribution="normal") elif params.initializer == "uniform_unit_scaling": return tf.variance_scaling_initializer(params.initializer_gain, mode="fan_avg", distribution="uniform") else: raise ValueError("Unrecognized initializer: %s" % params.initializer)
def conv2d(self, input, shape, name): with tf.variable_scope(name): conv = tf.nn.conv2d(input, tf.get_variable('kernel', dtype=tf.float32, shape=shape, initializer=xavier(), trainable=True), strides=(1, 1, 1, 1), padding="SAME", name='conv') conv = tf.nn.bias_add(conv, tf.get_variable( 'bias', shape=(shape[3], ), trainable=True, initializer=tf.zeros_initializer()), name='biasadd') return conv
def EncoderPCNN(self, is_training, init_vec=None): with tf.variable_scope("sentence-encoder", dtype=tf.float32, initializer=xavier(), reuse=tf.AUTO_REUSE): input_dim = self.input_embedding.shape[2] mask_embedding = tf.constant( [[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.float32) pcnn_mask = tf.nn.embedding_lookup(mask_embedding, self.mask) input_sentence = tf.expand_dims(self.input_embedding, axis=1) with tf.variable_scope("conv2d"): conv_kernel = self._GetVar( init_vec=init_vec, key='convkernel', name='kernel', shape=[1, 3, input_dim, FLAGS.hidden_size], trainable=True) conv_bias = self._GetVar(init_vec=init_vec, key='convbias', name='bias', shape=[FLAGS.hidden_size], trainable=True) x = tf.layers.conv2d(inputs=input_sentence, filters=FLAGS.hidden_size, kernel_size=[1, 3], strides=[1, 1], padding='same', reuse=tf.AUTO_REUSE) x = tf.reshape(x, [-1, FLAGS.max_length, FLAGS.hidden_size, 1]) x = tf.reduce_max( tf.reshape(pcnn_mask, [-1, 1, FLAGS.max_length, 3]) * tf.transpose(x, [0, 2, 1, 3]), axis=2) x = tf.nn.relu(tf.reshape(x, [-1, FLAGS.hidden_size * 3])) return x
def __init__(self): # Some shortcuts for the dimensions we need HID_HID = [cfg['lstm_size'], cfg['lstm_size']] IN_HID = [cfg['embeddings_size'], cfg['lstm_size']] HID = [1, cfg['lstm_size']] # The hidden vector is the output self.output_size = HID # The state consists of the cell and the hidden vectors, and both have # the same dimensions self.state_size = tf.TensorShape(HID_HID) # W are the matrices which multiply the input, and U are the matrices # which multiply the previous hidden state # Input variables self.Wi = tf.get_variable('Wi', IN_HID, initializer=xavier()) self.Ui = tf.get_variable('Ui', HID_HID, initializer=xavier()) self.bi = tf.get_variable('bi', HID, initializer=xavier()) # Modulation variables self.Wm = tf.get_variable('Wm', IN_HID, initializer=xavier()) self.Um = tf.get_variable('Um', HID_HID, initializer=xavier()) self.bm = tf.get_variable('bm', HID, initializer=xavier()) # Forget variables self.Wf = tf.get_variable('Wf', IN_HID, initializer=xavier()) self.Uf = tf.get_variable('Uf', HID_HID, initializer=xavier()) self.bf = tf.get_variable('bf', HID, initializer=xavier()) # Reveal variables self.Wr = tf.get_variable('Wr', IN_HID, initializer=xavier()) self.Ur = tf.get_variable('Ur', HID_HID, initializer=xavier()) self.br = tf.get_variable('br', HID, initializer=xavier())
def __init__(self, parameters, neurons_hidden, categories, learning_rate, reg_lambda): """ :param parameters: number of features in the input layer :param neurons_hidden: number of hidden units and layers. list of form [num_layer1, num_layer2, ...] :param categories: number of categories in the output layer :param learning_rate: learning rate :param reg_lambda: L2 regularization value """ self.in_vector = tf.placeholder(tf.float32, [None, parameters], name='input') self.target_vect = tf.placeholder(tf.int64, [None], name='target') self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") self.class_weights = tf.placeholder(tf.float32, [categories], name='class_weights') self.weights = [] # Generate Fully Connected Layers self.hidden_layers = [] for i, num_neurons in enumerate(neurons_hidden): with tf.variable_scope('fully_connected-%d' % i): # We use Xavier initializer instead of sampling from a gaussian w = tf.get_variable( 'W', shape=(parameters if i == 0 else neurons_hidden[i - 1], neurons_hidden[i]), initializer=xavier(), regularizer=l2_regularizer(reg_lambda)) b = tf.Variable(tf.constant(0.1, shape=[neurons_hidden[i]]), name="b") self.hidden_layers.append( tf.nn.relu( tf.nn.xw_plus_b(self.in_vector if i == 0 else self.hidden_layers[-1], w, b, name="ffn"))) # Apply dropout with tf.name_scope('dropout'): self.drop = tf.nn.dropout(self.hidden_layers[-1], self.dropout_keep_prob) # Get output with tf.variable_scope("output"): w = tf.get_variable('W', shape=[neurons_hidden[-1], categories], initializer=xavier(), regularizer=l2_regularizer(reg_lambda)) b = tf.Variable(tf.constant(0.1, shape=[categories]), name="b") self.scores = tf.nn.xw_plus_b(self.drop, w, b, name="scores") self.predictions = tf.nn.softmax(self.scores, name='predictions') self.category = tf.arg_max(self.scores, 1) # CalculateMean cross-entropy loss with tf.name_scope("loss"): losses = tf.nn.sparse_softmax_cross_entropy_with_logits( self.scores, self.target_vect) # Weighted loss depending on class frequency scale = tf.gather(self.class_weights, self.target_vect) self.loss = tf.reduce_mean(losses * scale) + \ sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # Weight Decay tf.summary.scalar('loss', self.loss) # Accuracy with tf.name_scope("accuracy"): correct_predictions = tf.equal( tf.argmax(tf.nn.softmax(self.scores), 1), self.target_vect) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32), name="accuracy") tf.summary.scalar('accuracy', self.accuracy) # Adam Optimizer with exponential decay and gradient clipping with tf.name_scope("Optimizer"): step = tf.Variable(0, trainable=False) rate = tf.train.exponential_decay(learning_rate, step, 1, 0.9999) optimizer = tf.train.AdamOptimizer(rate) tvars = tf.trainable_variables() gradients = tf.gradients(self.loss, tvars) clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5) self.train_op = optimizer.apply_gradients(zip( clipped_gradients, tvars), global_step=step) # Keep track of gradient values and sparsity for gradient, variable in zip(clipped_gradients, tvars): if isinstance(gradient, ops.IndexedSlices): grad_values = gradient.values else: grad_values = gradient tf.summary.histogram(variable.name, variable) tf.summary.histogram(variable.name + "/gradients", grad_values) tf.summary.histogram(variable.name + "/gradient_norm", clip_ops.global_norm([grad_values])) tf.summary.scalar(variable.name + "/grad/sparsity", tf.nn.zero_fraction(gradient)) self.merged = tf.summary.merge_all()
def bulid(self,init_vec): with tf.variable_scope("embedding-lookup", initializer=xavier(), dtype=tf.float32): temp_word_embedding = self._GetVar(init_vec=init_vec, key='wordvec', name='temp_word_embedding', shape=[FLAGS.vocabulary_size, FLAGS.word_size],trainable=True) unk_word_embedding = self._GetVar(init_vec=init_vec, key='unkvec', name='unk_embedding',shape=[FLAGS.word_size],trainable=True) word_embedding = tf.concat([temp_word_embedding, tf.reshape(unk_word_embedding,[1,FLAGS.word_size]), tf.reshape(tf.constant(np.zeros(FLAGS.word_size),dtype=tf.float32),[1,FLAGS.word_size])],0) temp_pos1_embedding = self._GetVar(init_vec=init_vec, key='pos1vec', name='temp_pos1_embedding',shape=[FLAGS.pos_num,FLAGS.pos_size],trainable=True) temp_pos2_embedding = self._GetVar(init_vec=init_vec, key='pos2vec', name='temp_pos2_embedding',shape=[FLAGS.pos_num,FLAGS.pos_size],trainable=True) pos1_embedding = tf.concat([temp_pos1_embedding,tf.reshape(tf.constant(np.zeros(FLAGS.pos_size,dtype=np.float32)),[1, FLAGS.pos_size])],0) pos2_embedding = tf.concat([temp_pos2_embedding,tf.reshape(tf.constant(np.zeros(FLAGS.pos_size,dtype=np.float32)),[1, FLAGS.pos_size])],0) input_word = tf.nn.embedding_lookup(word_embedding, self.word) # N,max_len,d input_pos1 = tf.nn.embedding_lookup(pos1_embedding, self.pos1) input_pos2 = tf.nn.embedding_lookup(pos2_embedding, self.pos2) input_embedding = tf.concat(values = [input_word, input_pos1, input_pos2], axis = -1) #input_word_type = tf.nn.embedding_lookup(word_embedding, self.word_type) # N,max_len,d #input_pos1_type = tf.nn.embedding_lookup(pos1_embedding, self.pos1_type) #input_pos2_type = tf.nn.embedding_lookup(pos2_embedding, self.pos2_type) #input_embedding_type = tf.concat(values = [input_word_type, input_pos1_type, input_pos2_type], axis = -1) temp_type_embedding = tf.get_variable('type_embedding', shape=[FLAGS.type_num,FLAGS.type_dim] ,initializer=xavier(), dtype=tf.float32) type_embedding = tf.concat([tf.reshape(tf.constant(np.zeros(FLAGS.type_dim),dtype=tf.float32),[1,FLAGS.type_dim]),temp_type_embedding],0) #en1_type = tf.nn.embedding_lookup(type_embedding, self.en1_type) # batchsize,max_type_num,type_dim #en2_type = tf.nn.embedding_lookup(type_embedding, self.en2_type) #en1_type = tf.divide(tf.reduce_sum(en1_type, axis=1), tf.expand_dims(self.en1_type_len, axis=1)) #en2_type = tf.divide(tf.reduce_sum(en2_type, axis=1), tf.expand_dims(self.en2_type_len, axis=1)) #x_type = tf.concat([en1_type, en2_type], -1) #att_type = tf.get_variable('att_type', [FLAGS.type_dim,1],initializer=xavier()) #att_1_type = tf.get_variable('att_1_type', [FLAGS.type_dim,50],initializer=xavier()) #att_2_type = tf.get_variable('att_2_type', [50,1],initializer=xavier()) #padding = tf.constant(np.zeros(FLAGS.max_type_num)*(-1e8),dtype=tf.float32) #en1_type_stack, en2_type_stack = [],[] #for i in range(FLAGS.batch_size): # #temp_alpha_1 = tf.squeeze(en1_type[i] @ att_type , -1) # max_type_num,type_dim * type_dim,1 = max_type_num,1 # #temp_alpha_2 = tf.squeeze(en2_type[i] @ att_type , -1) # temp_alpha_1 = tf.squeeze(tf.nn.tanh(en1_type[i] @ att_1_type ) @ att_2_type, -1) # temp_alpha_2 = tf.squeeze(tf.nn.tanh(en2_type[i] @ att_1_type ) @ att_2_type, -1) # temp_alpha_1 = tf.where(tf.equal(self.en1_type_mask[i], 1), temp_alpha_1, padding) # temp_alpha_2 = tf.where(tf.equal(self.en2_type_mask[i], 1), temp_alpha_2, padding) # max_type_num # temp_alpha_1 = tf.nn.softmax(temp_alpha_1) # temp_alpha_2 = tf.nn.softmax(temp_alpha_2) # en1_type_stack.append(tf.squeeze(tf.expand_dims(temp_alpha_1,0) @ en1_type[i],0)) # 1,max_type_num * max_type_num,type_dim = 1,type_dim = type_dim # en2_type_stack.append(tf.squeeze(tf.expand_dims(temp_alpha_2,0) @ en2_type[i],0)) #en1_type_stack = tf.stack(en1_type_stack) #en2_type_stack = tf.stack(en2_type_stack) #x_type = tf.concat([en1_type_stack, en2_type_stack], -1) with tf.variable_scope("entity_typing"): input_type_1 = tf.concat(values = [input_word, input_pos1], axis = -1) input_type_2 = tf.concat(values = [input_word, input_pos2], axis = -1) input_type_1 = tf.concat(values = [input_type_1, input_type_2], axis = 0) lstm_cell_forward = tf.contrib.rnn.BasicLSTMCell(FLAGS.rnn_size) lstm_cell_backward = tf.contrib.rnn.BasicLSTMCell(FLAGS.rnn_size) #lstm_cell_forward = tf.contrib.rnn.DropoutWrapper(lstm_cell_forward, output_keep_prob=0.5) #lstm_cell_backward = tf.contrib.rnn.DropoutWrapper(lstm_cell_backward, output_keep_prob=0.5) #print(self.len.get_shape().as_list()) #print(input_embedding.get_shape().as_list()) #(all_states, last_states) = tf.nn.bidirectional_dynamic_rnn(lstm_cell_forward,lstm_cell_backward,input_embedding_type,dtype=tf.float32,sequence_length=self.len_type) len = tf.concat([self.len,self.len],0) (all_states, last_states) = tf.nn.bidirectional_dynamic_rnn(lstm_cell_forward,lstm_cell_backward,input_type_1,dtype=tf.float32,sequence_length=len) (fw_outputs,bw_outputs) = (all_states) # N,max_len,grusize outputs_1 = tf.concat([fw_outputs,bw_outputs],-1) # N,max_len,grusize*2 #(all_states, last_states) = tf.nn.bidirectional_dynamic_rnn(lstm_cell_forward,lstm_cell_backward,input_type_2,dtype=tf.float32,sequence_length=self.len) #(fw_outputs,bw_outputs) = (all_states) # N,max_len,grusize #outputs_2 = tf.concat([fw_outputs,bw_outputs],-1) # N,max_len,grusize*2 #(fw_state,bw_state) = (last_states) #(_,h_f) = fw_state #(_,h_b) = bw_state #states = tf.concat([h_f,h_b],-1) ET_att_1 = tf.get_variable('ET_att_1', [FLAGS.rnn_size*2,128],initializer=xavier()) ET_att_2 = tf.get_variable('ET_att_2', [128,1],initializer=xavier()) #padding = tf.constant(np.zeros((FLAGS.batch_size,FLAGS.max_len))*(-1e8),dtype=tf.float32) padding_1 = tf.ones_like(self.mask,dtype=tf.float32) * tf.constant([-1e8]) padding_2 = tf.ones_like(self.mask,dtype=tf.float32) * tf.constant([-1e8]) padding = tf.concat([padding_1,padding_1],0) mask = tf.concat([self.mask,self.mask],0) outputs_1_ = tf.reshape(outputs_1,[-1,FLAGS.rnn_size*2]) temp_alpha_1 = tf.reshape(tf.nn.relu(outputs_1_ @ ET_att_1) @ ET_att_2, [-1,FLAGS.max_len]) temp_alpha_1 = tf.where(tf.equal(mask, 0), padding, temp_alpha_1) alpha_1 = tf.nn.softmax(temp_alpha_1,-1) # N,max_len outputs_1 = tf.reshape(tf.expand_dims(alpha_1,1) @ outputs_1, [-1,FLAGS.rnn_size*2]) #outputs_2_ = tf.reshape(outputs_2,[-1,FLAGS.rnn_size*2]) #temp_alpha_2 = tf.reshape(tf.nn.relu(outputs_2_ @ ET_att_1) @ ET_att_2, [-1,FLAGS.max_len]) #temp_alpha_2 = tf.where(tf.equal(self.mask, 0), padding, temp_alpha_2) #alpha_2 = tf.nn.softmax(temp_alpha_2,-1) # N,max_len #outputs_2 = tf.reshape(tf.expand_dims(alpha_2,1) @ outputs_2, [-1,FLAGS.rnn_size*2]) ET_sent_att_1 = tf.get_variable('ET_sent_att_1', [FLAGS.rnn_size*2,128],initializer=xavier()) ET_sent_att_2 = tf.get_variable('ET_sent_att_2', [128,1],initializer=xavier()) alpha_type_sent_1 = tf.squeeze(tf.nn.tanh(outputs_1 @ ET_sent_att_1) @ ET_sent_att_2 , -1) #alpha_type_sent_2 = tf.squeeze(tf.nn.tanh(outputs_2 @ ET_sent_att_1) @ ET_sent_att_2 , -1) type_repre_1 = [] type_repre_2 = [] for i in range(FLAGS.batch_size): m = outputs_1[self.scope[i]:self.scope[i+1]]# (n , hidden_size) sent_score = tf.nn.softmax(alpha_type_sent_1[self.scope[i]:self.scope[i+1]]) type_repre_1.append(tf.squeeze(tf.matmul(tf.expand_dims(sent_score,0), m))) #m = outputs_2[self.scope[i]:self.scope[i+1]]# (n , hidden_size) #sent_score = tf.nn.softmax(alpha_type_sent_2[self.scope[i]:self.scope[i+1]]) #type_repre_2.append(tf.squeeze(tf.matmul(tf.expand_dims(sent_score,0), m))) for i in range(FLAGS.batch_size): m = outputs_1[self.scope[i]+FLAGS.batch_size:self.scope[i+1]+FLAGS.batch_size]# (n , hidden_size) sent_score = tf.nn.softmax(alpha_type_sent_1[self.scope[i]+FLAGS.batch_size:self.scope[i+1]+FLAGS.batch_size]) type_repre_2.append(tf.squeeze(tf.matmul(tf.expand_dims(sent_score,0), m))) type_repre_1 = tf.layers.dropout(tf.stack(type_repre_1), rate = 1 - self.keep_prob, training = self.istrain) type_repre_2 = tf.layers.dropout(tf.stack(type_repre_2), rate = 1 - self.keep_prob, training = self.istrain) ent1_word = tf.nn.embedding_lookup(word_embedding, self.en1_word) ent2_word = tf.nn.embedding_lookup(word_embedding, self.en2_word) #en1_outputs = tf.concat([outputs,ent1_word],-1) #en2_outputs = tf.concat([outputs,ent2_word],-1) en1_outputs = tf.concat([type_repre_1,ent1_word],-1) en2_outputs = tf.concat([type_repre_2,ent2_word],-1) ET_matrix = self._GetVar(init_vec=init_vec, key='disckernel', name='ET_matrix', shape=[39, FLAGS.rnn_size*2 + FLAGS.word_size]) ET_bias = self._GetVar(init_vec=init_vec, key='discbias', name='ET_bias', shape=[39], initializer=tf.zeros_initializer()) logits_1 = tf.matmul(en1_outputs, ET_matrix, transpose_b=True) + ET_bias logits_2 = tf.matmul(en2_outputs, ET_matrix, transpose_b=True) + ET_bias #print(logits_1.get_shape().as_list()) #label_onehot_1 = tf.one_hot(indices=self.en1_type, depth=39, dtype=tf.int32) #label_onehot_2 = tf.one_hot(indices=self.en2_type, depth=39, dtype=tf.int32) #loss_1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=label_onehot_1,logits=logits_1)) #loss_2 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=label_onehot_2,logits=logits_2)) loss_1 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.en1_type,logits=logits_1)) loss_2 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.en2_type,logits=logits_2)) output_1 = tf.nn.sigmoid(logits_1) # batchsize, 39 output_2 = tf.nn.sigmoid(logits_2) ones = tf.ones_like(logits_1) zeros = tf.zeros_like(logits_1) self.output_1 = tf.where(tf.greater(output_1, 0.5), ones, zeros) # batch_size, 39 self.output_2 = tf.where(tf.greater(output_2, 0.5), ones, zeros) en1_type_len = tf.reduce_sum(self.output_1[:,1:],keepdims=True,axis=-1) en2_type_len = tf.reduce_sum(self.output_2[:,1:],keepdims=True,axis=-1) #en1_type_len = tf.reduce_sum(self.output_1,keepdims=True,axis=-1) #en2_type_len = tf.reduce_sum(self.output_2,keepdims=True,axis=-1) ones = tf.ones_like(en1_type_len) en1_type_len_ = tf.where(tf.equal(en1_type_len, 0), ones, en1_type_len) en2_type_len_ = tf.where(tf.equal(en2_type_len, 0), ones, en2_type_len) en1_type = (self.output_1 @ type_embedding) / en1_type_len_ en2_type = (self.output_2 @ type_embedding) / en2_type_len_ #self.output_1 = tf.nn.softmax(logits_1,-1) #self.output_2 = tf.nn.softmax(logits_2,-1) #output_1 = tf.argmax(self.output_1,-1) #output_2 = tf.argmax(self.output_2,-1) #output_1 = tf.to_int32(output_1) #output_2 = tf.to_int32(output_2) #print(self.output_2 .get_shape().as_list()) #en1_type = tf.nn.embedding_lookup(type_embedding, output_1) #en2_type = tf.nn.embedding_lookup(type_embedding, output_2) #print(en1_type.get_shape().as_list()) x_type = tf.concat([en1_type, en2_type], -1) with tf.variable_scope("encoder"): input_dim = input_embedding.shape[-1] mask_embedding = tf.constant([[0,0,0],[1,0,0],[0,1,0],[0,0,1]], dtype=np.float32) pcnn_mask = tf.nn.embedding_lookup(mask_embedding, self.mask) input_sentence = tf.expand_dims(input_embedding, axis=1) with tf.variable_scope("conv2d"): conv_kernel = self._GetVar(init_vec=init_vec,key='convkernel',name='kernel', shape=[1,3,input_dim,FLAGS.hidden_size],trainable=True) conv_bias = self._GetVar(init_vec=init_vec,key='convbias',name='bias',shape=[FLAGS.hidden_size],trainable=True) x = tf.layers.conv2d(inputs = input_sentence, filters=FLAGS.hidden_size, kernel_size=[1,3], strides=[1, 1], padding='same', reuse=tf.AUTO_REUSE) sequence = tf.reshape(x, [-1, FLAGS.max_len, FLAGS.hidden_size]) x = tf.reshape(x, [-1, FLAGS.max_len, FLAGS.hidden_size, 1]) x = tf.reduce_max(tf.reshape(pcnn_mask, [-1, 1, FLAGS.max_len, 3]) * tf.transpose(x,[0, 2, 1, 3]), axis = 2) x = tf.nn.relu(tf.reshape(x, [-1, FLAGS.hidden_size * 3])) with tf.variable_scope("selector"): attention_1 = tf.get_variable('attention_1', [self.hidden_size,300],initializer=xavier()) attention_2 = tf.get_variable('attention_2', [300,1],initializer=xavier()) alpha = tf.squeeze(tf.nn.tanh(x @ attention_1) @ attention_2 , -1) bag_repre = [] for i in range(FLAGS.batch_size): m = x[self.scope[i]:self.scope[i+1]]# (n , hidden_size) sent_score = tf.nn.softmax(alpha[self.scope[i]:self.scope[i+1]]) bag_repre.append(tf.squeeze(tf.matmul(tf.expand_dims(sent_score,0), m))) bag_repre = tf.layers.dropout(tf.stack(bag_repre), rate = 1 - self.keep_prob, training = self.istrain) with tf.variable_scope("loss"): discrimitive_matrix = self._GetVar(init_vec=init_vec, key='disckernel', name='discrimitive_matrix', shape=[53, self.hidden_size + FLAGS.type_dim *2]) bias = self._GetVar(init_vec=init_vec, key='discbias', name='bias', shape=[53], initializer=tf.zeros_initializer()) bag_repre_type = tf.concat([bag_repre,x_type],-1) self.logit = tf.matmul(bag_repre_type, discrimitive_matrix, transpose_b=True) + bias self.output = tf.nn.softmax(self.logit,-1) label_onehot = tf.one_hot(indices=self.label, depth=FLAGS.num_classes, dtype=tf.int32) regularizer = tf.contrib.layers.l2_regularizer(0.00001) l2_loss = tf.contrib.layers.apply_regularization(regularizer=regularizer, weights_list=tf.trainable_variables()) self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=label_onehot,logits=self.logit)) + l2_loss + loss_1 + loss_2
def bulid(self, init_vec): with tf.variable_scope("embedding-lookup", initializer=xavier(), dtype=tf.float32): temp_word_embedding = self._GetVar( init_vec=init_vec, key='wordvec', name='temp_word_embedding', shape=[FLAGS.vocabulary_size, FLAGS.word_size], trainable=True) unk_word_embedding = self._GetVar(init_vec=init_vec, key='unkvec', name='unk_embedding', shape=[FLAGS.word_size]) word_embedding = tf.concat([ temp_word_embedding, tf.reshape(unk_word_embedding, [1, FLAGS.word_size]), tf.reshape( tf.constant(np.zeros(FLAGS.word_size), dtype=tf.float32), [1, FLAGS.word_size]) ], 0) temp_pos1_embedding = self._GetVar( init_vec=init_vec, key='pos1vec', name='temp_pos1_embedding', shape=[FLAGS.pos_num, FLAGS.pos_size]) temp_pos2_embedding = self._GetVar( init_vec=init_vec, key='pos2vec', name='temp_pos2_embedding', shape=[FLAGS.pos_num, FLAGS.pos_size]) pos1_embedding = tf.concat([ temp_pos1_embedding, tf.reshape( tf.constant(np.zeros(FLAGS.pos_size, dtype=np.float32)), [1, FLAGS.pos_size]) ], 0) pos2_embedding = tf.concat([ temp_pos2_embedding, tf.reshape( tf.constant(np.zeros(FLAGS.pos_size, dtype=np.float32)), [1, FLAGS.pos_size]) ], 0) input_word = tf.nn.embedding_lookup(word_embedding, self.word) # N,max_len,d input_pos1 = tf.nn.embedding_lookup(pos1_embedding, self.pos1) input_pos2 = tf.nn.embedding_lookup(pos2_embedding, self.pos2) input_embedding = tf.concat( values=[input_word, input_pos1, input_pos2], axis=-1) temp_type_embedding = tf.get_variable( 'type_embedding', shape=[FLAGS.type_num, FLAGS.type_dim], initializer=xavier(), dtype=tf.float32) type_embedding = tf.concat([ tf.reshape( tf.constant(np.zeros(FLAGS.type_dim), dtype=tf.float32), [1, FLAGS.type_dim]), temp_type_embedding ], 0) en1_type = tf.nn.embedding_lookup( type_embedding, self.en1_type) # batchsize,max_type_num,type_dim en2_type = tf.nn.embedding_lookup(type_embedding, self.en2_type) #en1_type = tf.divide(tf.reduce_sum(en1_type, axis=1), tf.expand_dims(self.en1_type_len, axis=1)) #en2_type = tf.divide(tf.reduce_sum(en2_type, axis=1), tf.expand_dims(self.en2_type_len, axis=1)) x_type = tf.concat([en1_type, en2_type], -1) '''#att_type = tf.get_variable('att_type', [FLAGS.type_dim,1],initializer=xavier()) att_1_type = tf.get_variable('att_1_type', [FLAGS.type_dim,50],initializer=xavier()) att_2_type = tf.get_variable('att_2_type', [50,1],initializer=xavier()) padding = tf.constant(np.zeros(FLAGS.max_type_num)*(-1e8),dtype=tf.float32) en1_type_stack, en2_type_stack = [],[] for i in range(FLAGS.batch_size): #temp_alpha_1 = tf.squeeze(en1_type[i] @ att_type , -1) # max_type_num,type_dim * type_dim,1 = max_type_num,1 #temp_alpha_2 = tf.squeeze(en2_type[i] @ att_type , -1) temp_alpha_1 = tf.squeeze(tf.nn.tanh(en1_type[i] @ att_1_type ) @ att_2_type, -1) temp_alpha_2 = tf.squeeze(tf.nn.tanh(en2_type[i] @ att_1_type ) @ att_2_type, -1) temp_alpha_1 = tf.where(tf.equal(self.en1_type_mask[i], 1), temp_alpha_1, padding) temp_alpha_2 = tf.where(tf.equal(self.en2_type_mask[i], 1), temp_alpha_2, padding) # max_type_num temp_alpha_1 = tf.nn.softmax(temp_alpha_1) temp_alpha_2 = tf.nn.softmax(temp_alpha_2) en1_type_stack.append(tf.squeeze(tf.expand_dims(temp_alpha_1,0) @ en1_type[i],0)) # 1,max_type_num * max_type_num,type_dim = 1,type_dim = type_dim en2_type_stack.append(tf.squeeze(tf.expand_dims(temp_alpha_2,0) @ en2_type[i],0)) en1_type_stack = tf.stack(en1_type_stack) en2_type_stack = tf.stack(en2_type_stack) x_type = tf.concat([en1_type_stack, en2_type_stack], -1)''' with tf.variable_scope("encoder"): input_dim = input_embedding.shape[-1] mask_embedding = tf.constant( [[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.float32) pcnn_mask = tf.nn.embedding_lookup(mask_embedding, self.mask) input_sentence = tf.expand_dims(input_embedding, axis=1) with tf.variable_scope("conv2d"): conv_kernel = self._GetVar( init_vec=init_vec, key='convkernel', name='kernel', shape=[1, 3, input_dim, FLAGS.hidden_size], trainable=True) conv_bias = self._GetVar(init_vec=init_vec, key='convbias', name='bias', shape=[FLAGS.hidden_size], trainable=True) x = tf.layers.conv2d(inputs=input_sentence, filters=FLAGS.hidden_size, kernel_size=[1, 3], strides=[1, 1], padding='same', reuse=tf.AUTO_REUSE) sequence = tf.reshape(x, [-1, FLAGS.max_len, FLAGS.hidden_size]) x = tf.reshape(x, [-1, FLAGS.max_len, FLAGS.hidden_size, 1]) x = tf.reduce_max( tf.reshape(pcnn_mask, [-1, 1, FLAGS.max_len, 3]) * tf.transpose(x, [0, 2, 1, 3]), axis=2) x = tf.nn.relu(tf.reshape(x, [-1, FLAGS.hidden_size * 3])) with tf.variable_scope("selector"): attention_1 = tf.get_variable('attention_1', [self.hidden_size, 300], initializer=xavier()) attention_2 = tf.get_variable('attention_2', [300, 1], initializer=xavier()) alpha = tf.squeeze(tf.nn.tanh(x @ attention_1) @ attention_2, -1) bag_repre = [] for i in range(FLAGS.batch_size): m = x[self.scope[i]:self.scope[i + 1]] # (n , hidden_size) sent_score = tf.nn.softmax(alpha[self.scope[i]:self.scope[i + 1]]) #m = x[self.scope[i][0]:self.scope[i][1]]# (n , hidden_size) #sent_score = tf.nn.softmax(alpha[self.scope[i][0]:self.scope[i][1]]) bag_repre.append( tf.squeeze(tf.matmul(tf.expand_dims(sent_score, 0), m))) bag_repre = tf.layers.dropout(tf.stack(bag_repre), rate=1 - self.keep_prob, training=self.istrain) with tf.variable_scope("loss"): discrimitive_matrix = self._GetVar( init_vec=init_vec, key='disckernel', name='discrimitive_matrix', shape=[53, self.hidden_size + FLAGS.type_dim * 2]) bias = self._GetVar(init_vec=init_vec, key='discbias', name='bias', shape=[53], initializer=tf.zeros_initializer()) bag_repre_type = tf.concat([bag_repre, x_type], -1) self.logit = tf.matmul( bag_repre_type, discrimitive_matrix, transpose_b=True) + bias self.output = tf.nn.softmax(self.logit, -1) label_onehot = tf.one_hot(indices=self.label, depth=FLAGS.num_classes, dtype=tf.int32) regularizer = tf.contrib.layers.l2_regularizer(0.00001) l2_loss = tf.contrib.layers.apply_regularization( regularizer=regularizer, weights_list=tf.trainable_variables()) self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2( labels=label_onehot, logits=self.logit)) + l2_loss
def __init__(self, is_training, init_vec): self.word = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.max_length], name='input_word') self.pos1 = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.max_length], name='input_pos1') self.pos2 = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.max_length], name='input_pos2') self.mask = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.max_length], name='input_mask') self.len = tf.placeholder(dtype=tf.int32, shape=[None], name='input_len') self.label_index = tf.placeholder(dtype=tf.int32, shape=[None], name='label_index') self.label = tf.placeholder( dtype=tf.float32, shape=[FLAGS.batch_size, FLAGS.num_classes], name='input_label') self.scope = tf.placeholder(dtype=tf.int32, shape=[FLAGS.batch_size + 1], name='scope') self.keep_prob = tf.placeholder(dtype=tf.float32, name='keep_prob') self.hier = init_vec['relation_levels'].shape[1] self.relation_levels = tf.constant( init_vec['relation_levels'], shape=[FLAGS.num_classes, self.hier], dtype=tf.int32, name='relation_levels') self.layer = (1 + np.max(init_vec['relation_levels'], 0)).astype( np.int32) word_size = FLAGS.word_size vocab_size = FLAGS.vocabulary_size - 2 with tf.variable_scope("embedding-lookup", initializer=xavier(), dtype=tf.float32): temp_word_embedding = self._GetVar(init_vec=init_vec, key='wordvec', name='temp_word_embedding', shape=[vocab_size, word_size], trainable=True) unk_word_embedding = self._GetVar(init_vec=init_vec, key='unkvec', name='unk_embedding', shape=[word_size], trainable=True) word_embedding = tf.concat([ temp_word_embedding, tf.reshape(unk_word_embedding, [1, word_size]), tf.reshape(tf.constant(np.zeros(word_size), dtype=tf.float32), [1, word_size]) ], 0) temp_pos1_embedding = self._GetVar( init_vec=init_vec, key='pos1vec', name='temp_pos1_embedding', shape=[FLAGS.pos_num, FLAGS.pos_size], trainable=True) temp_pos2_embedding = self._GetVar( init_vec=init_vec, key='pos2vec', name='temp_pos2_embedding', shape=[FLAGS.pos_num, FLAGS.pos_size], trainable=True) pos1_embedding = tf.concat([ temp_pos1_embedding, tf.reshape( tf.constant(np.zeros(FLAGS.pos_size, dtype=np.float32)), [1, FLAGS.pos_size]) ], 0) pos2_embedding = tf.concat([ temp_pos2_embedding, tf.reshape( tf.constant(np.zeros(FLAGS.pos_size, dtype=np.float32)), [1, FLAGS.pos_size]) ], 0) input_word = tf.nn.embedding_lookup(word_embedding, self.word) input_pos1 = tf.nn.embedding_lookup(pos1_embedding, self.pos1) input_pos2 = tf.nn.embedding_lookup(pos2_embedding, self.pos2) self.input_embedding = tf.concat( values=[input_word, input_pos1, input_pos2], axis=2) self.hidden_size, self.sentence_encoder = self._GetEncoder( FLAGS.model, is_training)
def __init__(self, is_training, init_vec=None): NN.__init__(self, is_training, init_vec) x = self.sentence_encoder(is_training, init_vec) with tf.variable_scope("sentence-level-attention", initializer=xavier(), dtype=tf.float32): relation_matrix = self._GetVar( init_vec=init_vec, key='relmat', name='relation_matrix', shape=[FLAGS.num_classes, self.hidden_size]) current_relation = tf.nn.embedding_lookup(relation_matrix, self.label_index) attention_logit = tf.reduce_sum(x * current_relation, 1) tower_repre = [] for i in range(FLAGS.batch_size): sen_matrix = x[self.scope[i]:self.scope[i + 1]] attention_score = tf.nn.softmax( tf.reshape( attention_logit[self.scope[i]:self.scope[i + 1]], [1, -1])) final_repre = tf.reshape( tf.matmul(attention_score, sen_matrix), [self.hidden_size]) tower_repre.append(final_repre) stack_repre = tf.layers.dropout(tf.stack(tower_repre), rate=1 - self.keep_prob, training=is_training) with tf.variable_scope("loss", dtype=tf.float32, initializer=xavier()): discrimitive_matrix = self._GetVar( init_vec=init_vec, key='discmat', name='discrimitive_matrix', shape=[FLAGS.num_classes, self.hidden_size]) bias = self._GetVar(init_vec=init_vec, key='disc_bias', name='bias', shape=[FLAGS.num_classes], initializer=tf.zeros_initializer()) logits = tf.matmul( stack_repre, discrimitive_matrix, transpose_b=True) + bias self.output = tf.nn.softmax(logits) regularizer = tf.contrib.layers.l2_regularizer(FLAGS.weight_decay) l2_loss = tf.contrib.layers.apply_regularization( regularizer=regularizer, weights_list=tf.trainable_variables()) self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=self.label, logits=logits)) + l2_loss tf.summary.scalar('loss', self.loss) self.predictions = tf.argmax(logits, 1, name="predictions") self.correct_predictions = tf.equal(self.predictions, tf.argmax(self.label, 1)) self.accuracy = tf.reduce_mean(tf.cast(self.correct_predictions, "float"), name="accuracy") if not is_training: with tf.variable_scope("test"): test_attention_logit = tf.matmul(x, relation_matrix, transpose_b=True) test_tower_output = [] for i in range(FLAGS.batch_size): test_attention_score = tf.nn.softmax( tf.transpose(test_attention_logit[ self.scope[i]:self.scope[i + 1], :])) test_final_repre = tf.matmul( test_attention_score, x[self.scope[i]:self.scope[i + 1]]) test_logits = tf.matmul(test_final_repre, discrimitive_matrix, transpose_b=True) + bias * 3 test_output = tf.diag_part(tf.nn.softmax(test_logits)) test_tower_output.append(test_output) test_stack_output = tf.reshape( tf.stack(test_tower_output), [FLAGS.batch_size, FLAGS.num_classes]) self.test_output = test_stack_output
def __init__(self, is_training, init_vec): NN.__init__(self, is_training, init_vec) x = self.sentence_encoder(is_training, init_vec) with tf.variable_scope("sentence-level-attention", initializer=xavier(), dtype=tf.float32): relation_matrixs = [] for i in range(self.hier): relation_matrixs.append( self._GetVar(init_vec=init_vec, key='relmat' + str(i), name='relation_matrix_l' + str(i), shape=[self.layer[i], self.hidden_size])) label_layer = tf.nn.embedding_lookup(self.relation_levels, self.label_index) attention_logits = [] for i in range(self.hier): current_relation = tf.nn.embedding_lookup( relation_matrixs[i], label_layer[:, i]) attention_logits.append(tf.reduce_sum(current_relation * x, 1)) attention_logits_stack = tf.stack(attention_logits) attention_score_hidden = tf.concat([ tf.nn.softmax( attention_logits_stack[:, self.scope[i]:self.scope[i + 1]]) for i in range(FLAGS.batch_size) ], 1) tower_repre = [] for i in range(FLAGS.batch_size): sen_matrix = x[self.scope[i]:self.scope[i + 1]] layer_score = attention_score_hidden[:, self.scope[i]:self. scope[i + 1]] layer_repre = tf.reshape(layer_score @ sen_matrix, [-1]) tower_repre.append(layer_repre) stack_repre = tf.layers.dropout(tf.stack(tower_repre), rate=1 - self.keep_prob, training=is_training) with tf.variable_scope("loss", dtype=tf.float32, initializer=xavier()): discrimitive_matrix = self._GetVar( init_vec=init_vec, key='disckernel', name='discrimitive_matrix', shape=[FLAGS.num_classes, self.hidden_size * self.hier]) bias = self._GetVar(init_vec=init_vec, key='discbias', name='bias', shape=[FLAGS.num_classes], initializer=tf.zeros_initializer()) logits = tf.matmul( stack_repre, discrimitive_matrix, transpose_b=True) + bias regularizer = tf.contrib.layers.l2_regularizer(FLAGS.weight_decay) l2_loss = tf.contrib.layers.apply_regularization( regularizer=regularizer, weights_list=tf.trainable_variables()) self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=self.label, logits=logits)) + l2_loss self.output = tf.nn.softmax(logits) tf.summary.scalar('loss', self.loss) self.predictions = tf.argmax(logits, 1, name="predictions") self.correct_predictions = tf.equal(self.predictions, tf.argmax(self.label, 1)) self.accuracy = tf.reduce_mean(tf.cast(self.correct_predictions, "float"), name="accuracy") if not is_training: with tf.variable_scope("test"): test_attention_scores = [] for i in range(self.hier): current_relation = tf.nn.embedding_lookup( relation_matrixs[i], self.relation_levels[:, i]) current_logit = tf.matmul(current_relation, x, transpose_b=True) current_score = tf.concat([ tf.nn.softmax( current_logit[:, self.scope[j]:self.scope[j + 1]]) for j in range(FLAGS.batch_size) ], 1) test_attention_scores.append(current_score) test_attention_scores_stack = tf.stack(test_attention_scores, 1) test_tower_output = [] for i in range(FLAGS.batch_size): test_sen_matrix = tf.tile( tf.expand_dims(x[self.scope[i]:self.scope[i + 1]], 0), [FLAGS.num_classes, 1, 1]) test_layer_score = test_attention_scores_stack[:, :, self. scope[i]: self. scope[i + 1]] test_layer_repre = tf.reshape( test_layer_score @ test_sen_matrix, [FLAGS.num_classes, -1]) test_logits = tf.matmul(test_layer_repre, discrimitive_matrix, transpose_b=True) + bias test_output = tf.diag_part(tf.nn.softmax(test_logits)) test_tower_output.append(test_output) test_stack_output = tf.reshape( tf.stack(test_tower_output), [FLAGS.batch_size, FLAGS.num_classes]) self.test_output = test_stack_output
def freeznet(self, config=None): """ Simple implementation of ResNet with FreezeOut method. Args: config: dict with params: -iteartions: Total number iteration for train model. -degree: 1 or 3. -learning_rate: initial learning rate. -scaled: True or False. Outputs: Method return list with len = 2 and some params: [0][0]: indices - Plcaeholder which takes batch indices. [0][1]: all_data - Placeholder which takes all images. [0][2]; all_lables - Placeholder for lables. [0][3]: loss - Value of loss function. [0][4]: train - List of train optimizers. [0][5]: prob - softmax output, need to prediction. [1][0]: accuracy - Current accuracy [1][1]: session - tf session """ iteration = config['iteration'] learning_rate = config['learning_rate'] scaled = config['scaled'] with tf.Graph().as_default(): indices = tf.placeholder(tf.int32, shape=[None, 1], name='indices') all_data = tf.placeholder(tf.float32, shape=[50000, 28, 28], name='all_data') input_batch = tf.gather_nd(all_data, indices, name='input_batch') input_batch = tf.reshape(input_batch, shape=[-1, 28, 28, 1], name='x_to_tens') net = tf.layers.conv2d(input_batch, 32, (7, 7), strides=(2, 2), padding='SAME', activation=tf.nn.relu, \ kernel_initializer=xavier(), name='1') net = tf.layers.max_pooling2d(net, (2, 2), (2, 2), name='max_pool') net = conv_block(net, 3, [32, 32, 128], name='2', strides=(1, 1)) net = identity_block(net, 3, [32, 32, 128], name='3') net = conv_block(net, 3, [64, 64, 256], name='4', strides=(1, 1)) net = identity_block(net, 3, [64, 64, 256], name='5') net = tf.layers.average_pooling2d(net, (7, 7), strides=(1, 1)) net = tf.contrib.layers.flatten(net) with tf.variable_scope('dense'): net = tf.layers.dense(net, 10, kernel_initializer=tf.contrib.layers.xavier_initializer(), name='dense') prob = tf.nn.softmax(net, name='soft') all_labels = tf.placeholder(tf.float32, [None, 10], name='all_labels') y = tf.gather_nd(all_labels, indices, name='y') loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=net, labels=y), name='loss') global_steps = [] train = [] for i in range(1, 6): global_steps.append(tf.Variable(0, trainable=False, name='var_{}'.format(i))) train.append(create_train(tf.train.MomentumOptimizer, str(i), \ global_steps[-1], loss, iteration * (i / 10 + 0.5) ** config['degree'], \ iteration, learning_rate, scaled)) lables_hat = tf.cast(tf.argmax(net, axis=1), tf.float32, name='lables_hat') lables = tf.cast(tf.argmax(y, axis=1), tf.float32, name='lables') accuracy = tf.reduce_mean(tf.cast(tf.equal(lables_hat, lables), tf.float32, name='accuracy')) session = tf.Session() session.run(tf.global_variables_initializer()) return [[indices, all_data, all_labels, loss, train, prob], [accuracy, session]]
def extractor(self, is_training, init_vec=None): with tf.variable_scope("sentence_encoder", dtype=tf.float32, initializer=xavier(), reuse=tf.AUTO_REUSE): entity = tf.expand_dims(self.input_entity, axis=1) * tf.ones( shape=[1, FLAGS.max_length, 1], dtype=tf.float32) word_with_entity = tf.concat([self.sentence, entity], 2) dim_word_entity = word_with_entity.shape[2] t_cnn = 0.05 "gate entity aware" pos_info = bn_dense_layer_v2(self.input_embedding, dim_word_entity, True, 0., 'pos_info', 'tanh', wd=0., keep_prob=1., is_train=is_training) word_gated_cnn = bn_dense_layer_v2(word_with_entity / t_cnn, dim_word_entity, True, 0., 'word_gated', 'sigmoid', False, wd=0., keep_prob=1., is_train=is_training) final_vector_cnn = word_gated_cnn * word_with_entity + ( 1 - word_gated_cnn) * pos_info "pcnn" mask_embedding = tf.constant( [[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.float32) pcnn_mask = tf.nn.embedding_lookup(mask_embedding, self.mask) cnn_input = tf.expand_dims(final_vector_cnn, axis=1) with tf.variable_scope('conv2d_pos'): conv_kernel = self._GetVar( init_vec=None, key='convkernel', name='kernel_pos', shape=[1, 3, dim_word_entity, FLAGS.hidden_size], trainable=True) conv_bias = self._GetVar(init_vec=None, key='convbias', name='bias_pos', shape=[FLAGS.hidden_size], trainable=True) x = tf.layers.conv2d(inputs=cnn_input, filters=FLAGS.hidden_size, kernel_size=[1, 3], strides=[1, 1], padding='same', reuse=tf.AUTO_REUSE) x = tf.reshape(x, [-1, FLAGS.max_length, FLAGS.hidden_size, 1]) pcnn_x = tf.reshape(pcnn_mask, [-1, 1, FLAGS.max_length, 3 ]) * tf.transpose(x, [0, 2, 1, 3]) output = tf.nn.relu( tf.reshape(tf.reduce_max(pcnn_x, 2), [-1, FLAGS.hidden_size * 3])) return output
def bulid(self, init_vec): with tf.variable_scope("embedding-lookup", initializer=xavier(), dtype=tf.float32): temp_word_embedding = self._GetVar( init_vec=init_vec, key='wordvec', name='temp_word_embedding', shape=[FLAGS.vocabulary_size, FLAGS.word_size], trainable=True) unk_word_embedding = self._GetVar(init_vec=init_vec, key='unkvec', name='unk_embedding', shape=[FLAGS.word_size], trainable=True) word_embedding = tf.concat([ temp_word_embedding, tf.reshape(unk_word_embedding, [1, FLAGS.word_size]), tf.reshape( tf.constant(np.zeros(FLAGS.word_size), dtype=tf.float32), [1, FLAGS.word_size]) ], 0) temp_pos1_embedding = self._GetVar( init_vec=init_vec, key='pos1vec', name='temp_pos1_embedding', shape=[FLAGS.pos_num, FLAGS.pos_size], trainable=True) temp_pos2_embedding = self._GetVar( init_vec=init_vec, key='pos2vec', name='temp_pos2_embedding', shape=[FLAGS.pos_num, FLAGS.pos_size], trainable=True) pos1_embedding = tf.concat([ temp_pos1_embedding, tf.reshape( tf.constant(np.zeros(FLAGS.pos_size, dtype=np.float32)), [1, FLAGS.pos_size]) ], 0) pos2_embedding = tf.concat([ temp_pos2_embedding, tf.reshape( tf.constant(np.zeros(FLAGS.pos_size, dtype=np.float32)), [1, FLAGS.pos_size]) ], 0) input_word = tf.nn.embedding_lookup(word_embedding, self.word) # N,max_len,d input_pos1 = tf.nn.embedding_lookup(pos1_embedding, self.pos1) input_pos2 = tf.nn.embedding_lookup(pos2_embedding, self.pos2) with tf.variable_scope("entity_typing"): input_type_1 = tf.concat(values=[input_word, input_pos1], axis=-1) input_type_2 = tf.concat(values=[input_word, input_pos2], axis=-1) input_type_1 = tf.concat(values=[input_type_1, input_type_2], axis=0) lstm_cell_forward = tf.contrib.rnn.BasicLSTMCell(FLAGS.rnn_size) lstm_cell_backward = tf.contrib.rnn.BasicLSTMCell(FLAGS.rnn_size) len = tf.concat([self.len, self.len], 0) (all_states, last_states) = tf.nn.bidirectional_dynamic_rnn( lstm_cell_forward, lstm_cell_backward, input_type_1, dtype=tf.float32, sequence_length=len) (fw_outputs, bw_outputs) = (all_states) # N,max_len,grusize outputs_1 = tf.concat([fw_outputs, bw_outputs], -1) # N,max_len,grusize*2 ET_att_1 = tf.get_variable('ET_att_1', [FLAGS.rnn_size * 2, 128], initializer=xavier()) ET_att_2 = tf.get_variable('ET_att_2', [128, 1], initializer=xavier()) padding_1 = tf.ones_like(self.mask, dtype=tf.float32) * tf.constant([-1e8]) padding = tf.concat([padding_1, padding_1], 0) mask = tf.concat([self.mask, self.mask], 0) outputs_1_ = tf.reshape(outputs_1, [-1, FLAGS.rnn_size * 2]) temp_alpha_1 = tf.reshape( tf.nn.relu(outputs_1_ @ ET_att_1) @ ET_att_2, [-1, FLAGS.max_len]) temp_alpha_1 = tf.where(tf.equal(mask, 0), padding, temp_alpha_1) alpha_1 = tf.nn.softmax(temp_alpha_1, -1) # N,max_len outputs_1 = tf.reshape( tf.expand_dims(alpha_1, 1) @ outputs_1, [-1, FLAGS.rnn_size * 2]) ET_sent_att_1 = tf.get_variable('ET_sent_att_1', [FLAGS.rnn_size * 2, 128], initializer=xavier()) ET_sent_att_2 = tf.get_variable('ET_sent_att_2', [128, 1], initializer=xavier()) alpha_type_sent_1 = tf.squeeze( tf.nn.tanh(outputs_1 @ ET_sent_att_1) @ ET_sent_att_2, -1) type_repre_1 = [] type_repre_2 = [] for i in range(FLAGS.batch_size): m = outputs_1[self.scope[i]:self.scope[i + 1]] # (n , hidden_size) sent_score = tf.nn.softmax( alpha_type_sent_1[self.scope[i]:self.scope[i + 1]]) type_repre_1.append( tf.squeeze(tf.matmul(tf.expand_dims(sent_score, 0), m))) for i in range(FLAGS.batch_size): m = outputs_1[self.scope[i] + FLAGS.batch_size:self.scope[i + 1] + FLAGS.batch_size] # (n , hidden_size) sent_score = tf.nn.softmax( alpha_type_sent_1[self.scope[i] + FLAGS.batch_size:self.scope[i + 1] + FLAGS.batch_size]) type_repre_2.append( tf.squeeze(tf.matmul(tf.expand_dims(sent_score, 0), m))) type_repre_1 = tf.layers.dropout(tf.stack(type_repre_1), rate=1 - self.keep_prob, training=self.istrain) type_repre_2 = tf.layers.dropout(tf.stack(type_repre_2), rate=1 - self.keep_prob, training=self.istrain) ent1_word = tf.nn.embedding_lookup(word_embedding, self.en1_word) ent2_word = tf.nn.embedding_lookup(word_embedding, self.en2_word) en1_outputs = tf.concat([type_repre_1, ent1_word], -1) en2_outputs = tf.concat([type_repre_2, ent2_word], -1) ET_matrix = self._GetVar( init_vec=init_vec, key='disckernel', name='ET_matrix', shape=[39, FLAGS.rnn_size * 2 + FLAGS.word_size]) ET_bias = self._GetVar(init_vec=init_vec, key='discbias', name='ET_bias', shape=[39], initializer=tf.zeros_initializer()) logits_1 = tf.matmul(en1_outputs, ET_matrix, transpose_b=True) + ET_bias logits_2 = tf.matmul(en2_outputs, ET_matrix, transpose_b=True) + ET_bias loss_1 = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.en1_type, logits=logits_1)) loss_2 = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.en2_type, logits=logits_2)) output_1 = tf.nn.sigmoid(logits_1) # batchsize, 39 output_2 = tf.nn.sigmoid(logits_2) ones = tf.ones_like(logits_1) zeros = tf.zeros_like(logits_1) self.output_1 = tf.where(tf.greater(output_1, 0.5), ones, zeros) # batch_size, 39 self.output_2 = tf.where(tf.greater(output_2, 0.5), ones, zeros) with tf.variable_scope("loss"): regularizer = tf.contrib.layers.l2_regularizer(0.00001) l2_loss = tf.contrib.layers.apply_regularization( regularizer=regularizer, weights_list=tf.trainable_variables()) self.loss = l2_loss + loss_1 + loss_2
def __init__(self, is_training, init_vec): NN.__init__(self, is_training, init_vec) x = self.sentence_encoder(is_training, init_vec) with tf.variable_scope('bag-vote', initializer=xavier(), dtype=tf.float32): hier3_relation_matrix = self._GetVar( init_vec=None, key=None, name='hier3_relation_matrix', initializer=tf.orthogonal_initializer(), shape=[FLAGS.num_classes, self.hidden_size]) hier2_relation_matrix = self._GetVar( init_vec=None, key=None, name='hier2_relation_matrix', initializer=tf.orthogonal_initializer(), shape=[FLAGS.num_hier2_classes, self.hidden_size]) hier1_relation_matrix = self._GetVar( init_vec=None, key=None, name='hier1_relation_matrix', initializer=tf.orthogonal_initializer(), shape=[FLAGS.num_hier1_classes, self.hidden_size]) "hierarchical_rank1" hier1_logits = tf.matmul(x, hier1_relation_matrix, transpose_b=True) hier1_index = tf.nn.softmax(hier1_logits, -1) hier1_relation = tf.matmul(hier1_index, hier1_relation_matrix) "gate" concat_hier1 = tf.concat([x, hier1_relation], 1) alpha_hier1 = bn_dense_layer_v2(concat_hier1, self.hidden_size, True, scope='gate_hier1', activation='sigmoid', is_train=is_training) context_hier1 = alpha_hier1 * x + (1 - alpha_hier1) * hier1_relation "MLP linear" middle_hier1 = bn_dense_layer_v2(context_hier1, 1024, False, scope='mlp_activation_hier1', activation='relu', is_train=is_training) output_hier1 = bn_dense_layer_v2(middle_hier1, self.hidden_size, False, scope='mlp_linear_hier1', activation='linear', is_train=is_training) "add&norm" output_hier1 += x output_hier1 = tf.contrib.layers.layer_norm(output_hier1) "hierarchical_rank2" hier2_logits = tf.matmul(x, hier2_relation_matrix, transpose_b=True) hier2_index = tf.nn.softmax(hier2_logits, -1) hier2_relation = tf.matmul(hier2_index, hier2_relation_matrix) "gate_hier2" concat_hier2 = tf.concat([x, hier2_relation], 1) alpha_hier2 = bn_dense_layer_v2(concat_hier2, self.hidden_size, True, scope='gate_hier2', activation='sigmoid', is_train=is_training) context_hier2 = alpha_hier2 * x + (1 - alpha_hier2) * hier2_relation "MLP linear" middle_hier2 = bn_dense_layer_v2(context_hier2, 1024, False, scope='mlp_activation_hier2', activation='relu', is_train=is_training) output_hier2 = bn_dense_layer_v2(middle_hier2, self.hidden_size, False, scope='mlp_linear_hier2', activation='linear', is_train=is_training) "add&norm" output_hier2 += x output_hier2 = tf.contrib.layers.layer_norm(output_hier2) "hierarchical_rank3" hier3_logits = tf.matmul(x, hier3_relation_matrix, transpose_b=True) hier3_index = tf.nn.softmax(hier3_logits, -1) hier3_relation = tf.matmul(hier3_index, hier3_relation_matrix) "gate_hier3" concat_hier3 = tf.concat([x, hier3_relation], 1) alpha_hier3 = bn_dense_layer_v2(concat_hier3, self.hidden_size, True, scope='gate_hier3', activation='sigmoid', is_train=is_training) context_hier3 = alpha_hier3 * x + (1 - alpha_hier3) * hier3_relation "MLP linear" middle_hier3 = bn_dense_layer_v2(context_hier3, 1024, False, scope='mlp_activation_hier3', activation='relu', is_train=is_training) output_hier3 = bn_dense_layer_v2(middle_hier3, self.hidden_size, False, scope='mlp_linear_hier3', activation='linear', is_train=is_training) "add&norm" output_hier3 += x output_hier3 = tf.contrib.layers.layer_norm(output_hier3) output_hier = tf.concat([output_hier1, output_hier2, output_hier3], 1) prob_bag_hier3 = bn_dense_layer_v2( output_hier, 1, True, scope='self-attn-hier3', activation='linear', is_train=is_training) #->(bs, 1) tower_repre = [] for i in range(FLAGS.batch_size): prob_hier3 = tf.nn.softmax( tf.reshape(prob_bag_hier3[self.scope[i]:self.scope[i + 1]], [1, -1])) sen_hier3 = tf.reshape( tf.matmul(prob_hier3, output_hier[self.scope[i]:self.scope[i + 1]]), [self.hidden_size * 3]) tower_repre.append(sen_hier3) stack_repre = tf.stack(tower_repre) fusion_repre = tf.layers.dropout(stack_repre, rate=1 - self.keep_prob, training=is_training) with tf.variable_scope("loss", dtype=tf.float32, initializer=xavier()): discrimitive_matrix = self._GetVar( init_vec=None, key='discmat', name='discrimitive_matrix', initializer=tf.orthogonal_initializer(), shape=[FLAGS.num_classes, 3 * self.hidden_size]) bias = self._GetVar(init_vec=None, key='disc_bias', name='bias', shape=[FLAGS.num_classes]) logits = tf.matmul( fusion_repre, discrimitive_matrix, transpose_b=True) + bias regularizer = tf.contrib.layers.l2_regularizer( FLAGS.weight_decay) l2_loss = tf.contrib.layers.apply_regularization( regularizer=regularizer, weights_list=tf.trainable_variables()) n_hier1 = tf.cast(FLAGS.num_hier1_classes - 1, tf.float32) p_hier1 = 1.0 - 0.1 q_hier1 = 0.1 / n_hier1 soft_hier1 = tf.one_hot(tf.cast(self.sen_hier1, tf.int32), depth=FLAGS.num_hier1_classes, on_value=p_hier1, off_value=q_hier1) hier1_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=soft_hier1, logits=hier1_logits)) n_hier2 = tf.cast(FLAGS.num_hier2_classes - 1, tf.float32) p_hier2 = 1.0 - 0.1 q_hier2 = 0.1 / n_hier2 soft_hier2 = tf.one_hot(tf.cast(self.sen_hier2, tf.int32), depth=FLAGS.num_hier2_classes, on_value=p_hier2, off_value=q_hier2) hier2_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=soft_hier2, logits=hier2_logits)) n_hier3 = tf.cast(FLAGS.num_classes - 1, tf.float32) p_hier3 = 1.0 - 0.1 q_hier3 = 0.1 / n_hier3 soft_hier3 = tf.one_hot(tf.cast(self.label_index, tf.int32), depth=FLAGS.num_classes, on_value=p_hier3, off_value=q_hier3) hier3_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=soft_hier3, logits=hier3_logits)) self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=self.label, logits=logits) ) + hier3_loss + hier2_loss + hier1_loss + l2_loss self.output = tf.nn.softmax(logits) tf.summary.scalar('loss', self.loss) self.predictions = tf.argmax(logits, 1, name="predictions") self.correct_predictions = tf.equal(self.predictions, tf.argmax(self.label, 1)) self.accuracy = tf.reduce_mean(tf.cast( self.correct_predictions, "float"), name="accuracy") if not is_training: self.test_output = self.output