def build_graph(self, input_network_outputs={}, reuse=True, debug=False, nornn=False): """""" with tf.variable_scope('Embeddings'): if self.sum_pos: # TODO this should be done with a `POSMultivocab` pos_vocabs = list( filter(lambda x: 'POS' in x.classname, self.input_vocabs)) pos_tensors = [ input_vocab.get_input_tensor(embed_keep_prob=1, reuse=reuse) for input_vocab in pos_vocabs ] non_pos_tensors = [ input_vocab.get_input_tensor(reuse=reuse) for input_vocab in self.input_vocabs if 'POS' not in input_vocab.classname ] #pos_tensors = [tf.Print(pos_tensor, [pos_tensor]) for pos_tensor in pos_tensors] #non_pos_tensors = [tf.Print(non_pos_tensor, [non_pos_tensor]) for non_pos_tensor in non_pos_tensors] if pos_tensors: pos_tensors = tf.add_n(pos_tensors) if not reuse: pos_tensors = [ pos_vocabs[0].drop_func( pos_tensors, pos_vocabs[0].embed_keep_prob) ] else: pos_tensors = [pos_tensors] input_tensors = non_pos_tensors + pos_tensors else: input_tensors = [ input_vocab.get_input_tensor(reuse=reuse) for input_vocab in self.input_vocabs ] for input_network, output in input_network_outputs: with tf.variable_scope(input_network.classname): input_tensors.append( input_network.get_input_tensor(output, reuse=reuse)) layer = tf.concat(input_tensors, 2) n_nonzero = tf.to_float(tf.count_nonzero(layer, axis=-1, keepdims=True)) batch_size, bucket_size, input_size = nn.get_sizes(layer) layer *= input_size / (n_nonzero + tf.constant(1e-12)) token_weights = nn.greater(self.id_vocab.placeholder, 0) tokens_per_sequence = tf.reduce_sum(token_weights, axis=1) n_tokens = tf.reduce_sum(tokens_per_sequence) n_sequences = tf.count_nonzero(tokens_per_sequence) seq_lengths = tokens_per_sequence + 1 root_weights = token_weights + (1 - nn.greater(tf.range(bucket_size), 0)) # token_weights = root_weights # root_weights = token_weights token_weights3D = tf.expand_dims( token_weights, axis=-1) * tf.expand_dims(root_weights, axis=-2) token_weights2D = tf.expand_dims( root_weights, axis=-1) * tf.expand_dims(root_weights, axis=-2) # as our three dimension a b c, is a->b to deciding, so all binary potential should not contain root(x) # in fact root should contained in second order prediction except sibling, but for simpler we set all for same token_weights4D = tf.cast( tf.expand_dims(token_weights2D, axis=-3) * tf.expand_dims(tf.expand_dims(root_weights, axis=-1), axis=-1), dtype=tf.float32) # abc -> ab,ac #token_weights_sib = tf.cast(tf.expand_dims(root_, axis=-3) * tf.expand_dims(tf.expand_dims(root_weights, axis=-1),axis=-1),dtype=tf.float32) #abc -> ab,cb #pdb.set_trace() token_weights_cop = tf.cast( tf.expand_dims(token_weights2D, axis=-2) * tf.expand_dims(tf.expand_dims(token_weights, axis=1), axis=-1), dtype=tf.float32) token_weights_cop_0 = token_weights_cop[:, 0] * tf.cast( tf.transpose(token_weights3D, [0, 2, 1]), dtype=tf.float32) token_weights_cop = tf.concat( [token_weights_cop_0[:, None, :], token_weights_cop[:, 1:]], 1) #data=np.stack((devprint['printdata']['layer_cop'][0][0]*devprint['token_weights3D'][0].T)[None,:],devprint['printdata']['layer_cop'][0][1:]) #abc -> ab, bc token_weights_gp = tf.cast( tf.expand_dims(tf.transpose(token_weights3D, [0, 2, 1]), axis=-1) * tf.expand_dims(tf.expand_dims(token_weights, axis=1), axis=1), dtype=tf.float32) #abc -> ca, ab token_weights_gp2 = tf.cast( tf.expand_dims(token_weights3D, axis=2) * tf.expand_dims(tf.expand_dims(token_weights, axis=-1), axis=1), dtype=tf.float32) token_weights_sib = token_weights_gp tokens = { 'n_tokens': n_tokens, 'tokens_per_sequence': tokens_per_sequence, 'token_weights': token_weights, 'n_sequences': n_sequences } conv_keep_prob = 1. if reuse else self.conv_keep_prob recur_keep_prob = 1. if reuse else self.recur_keep_prob recur_include_prob = 1. if reuse else self.recur_include_prob if not nornn and not self.nornn: for i in six.moves.range(self.n_layers): conv_width = self.first_layer_conv_width if not i else self.conv_width with tf.variable_scope('RNN-{}'.format(i)): layer, _ = recurrent.directed_RNN( layer, self.recur_size, seq_lengths, bidirectional=self.bidirectional, recur_cell=self.recur_cell, conv_width=conv_width, recur_func=self.recur_func, conv_keep_prob=conv_keep_prob, recur_include_prob=recur_include_prob, recur_keep_prob=recur_keep_prob, cifg=self.cifg, highway=self.highway, highway_func=self.highway_func, bilin=self.bilin) else: print('do not use RNN') output_fields = {vocab.field: vocab for vocab in self.output_vocabs} outputs = {} #pdb.set_trace() with tf.variable_scope('Classifiers'): if 'deprel' in output_fields: vocab = output_fields['deprel'] if vocab.factorized: head_vocab = output_fields['dephead'] head_vocab.token_weights_sib = token_weights_sib head_vocab.token_weights_cop = token_weights_cop head_vocab.token_weights_gp = token_weights_gp head_vocab.token_weights_gp2 = token_weights_gp2 head_vocab.token_weights = token_weights with tf.variable_scope('Unlabeled'): if self.layer_mask(head_vocab): unlabeled_outputs = head_vocab.get_bilinear_classifier( layer, token_weights=token_weights3D, reuse=reuse, debug=debug, token_weights4D=token_weights4D, sentence_mask=token_weights) else: unlabeled_outputs = head_vocab.get_bilinear_classifier( layer, token_weights=token_weights, reuse=reuse) with tf.variable_scope('Labeled'): labeled_outputs = vocab.get_bilinear_classifier( layer, unlabeled_outputs, token_weights=token_weights, reuse=reuse) else: labeled_outputs = vocab.get_unfactored_bilinear_classifier( layer, head_vocab.placeholder, token_weights=token_weights, reuse=reuse) outputs['deptree'] = labeled_outputs self._evals.add('deptree') if 'ufeats' in output_fields: vocab = output_fields['ufeats'] outputs[vocab.field] = vocab.get_bilinear_classifier( layer, labeled_outputs, token_weights=token_weights, reuse=reuse) self._evals.add('ufeats') elif 'dephead' in output_fields: vocab = output_fields['dephead'] outputs[vocab.classname] = vocab.get_bilinear_classifier( layer, token_weights=token_weights, reuse=reuse) self._evals.add('dephead') if debug: outputs['deptree']['token_weights'] = token_weights outputs['deptree']['token_weights3D'] = token_weights3D outputs['deptree']['root_weights'] = root_weights outputs['deptree']['token_weights4D'] = token_weights4D outputs['deptree']['token_weights_sib'] = token_weights_sib outputs['deptree']['token_weights_gp'] = token_weights_gp outputs['deptree']['token_weights_gp2'] = token_weights_gp2 return outputs, tokens
def get_input_tensor(self, embed_keep_prob=None, nonzero_init=False, variable_scope=None, reuse=True): """""" embed_keep_prob = embed_keep_prob or self.embed_keep_prob conv_keep_prob = 1. if reuse else self.conv_keep_prob recur_keep_prob = 1. if reuse else self.recur_keep_prob output_keep_prob = 1. if reuse else self.output_keep_prob layers = [] with tf.variable_scope(variable_scope or self.classname) as scope: for i, placeholder in enumerate( self._multibucket.get_placeholders()): if i: scope.reuse_variables() #with tf.device('/gpu:0'): #with tf.device('/gpu:{}'.format(i)): with tf.variable_scope('Embeddings'): layer = embeddings.token_embedding_lookup( len(self), self.embed_size, placeholder, nonzero_init=True, reuse=reuse) seq_lengths = tf.count_nonzero(placeholder, axis=1, dtype=tf.int32) for j in six.moves.range(self.n_layers): conv_width = self.first_layer_conv_width if not j else self.conv_width with tf.variable_scope('RNN-{}'.format(j)): layer, final_states = recurrent.directed_RNN( layer, self.recur_size, seq_lengths, bidirectional=self.bidirectional, recur_cell=self.recur_cell, conv_width=conv_width, recur_func=self.recur_func, conv_keep_prob=conv_keep_prob, recur_keep_prob=recur_keep_prob, cifg=self.cifg, highway=self.highway, highway_func=self.highway_func, bilin=self.bilin) if not self.squeeze_type.startswith('gated'): if self.squeeze_type == 'linear_attention': with tf.variable_scope('Attention'): _, layer = classifiers.linear_attention( layer, hidden_keep_prob=output_keep_prob) elif self.squeeze_type == 'final_hidden': layer, _ = tf.split(final_states, 2, axis=-1) elif self.squeeze_type == 'final_cell': _, layer = tf.split(final_states, 2, axis=-1) elif self.squeeze_type == 'final_state': layer = final_states elif self.squeeze_type == 'reduce_max': layer = tf.reduce_max(layer, axis=-2) with tf.variable_scope('Linear'): layer = classifiers.hidden( layer, self.output_size, hidden_func=self.output_func, hidden_keep_prob=output_keep_prob) else: with tf.variable_scope('Attention'): attn, layer = classifiers.deep_linear_attention( layer, self.output_size, hidden_func=nonlin.identity, hidden_keep_prob=output_keep_prob) if self.squeeze_type == 'gated_reduce_max': layer = tf.nn.relu(tf.reduce_max( layer, axis=-2)) + .1 * tf.reduce_sum( layer, axis=-2) / (tf.count_nonzero( layer, axis=-2, dtype=tf.float32) + 1e-12) elif self.squeeze_type == 'gated_reduce_sum': layer = self.output_func(tf.reduce_sum(layer, axis=-2)) #layer = tf.tf.Print(layer, [tf.shape(layer)]) layers.append(layer) # Concatenate all the buckets' embeddings layer = tf.concat(layers, 0) # Put them in the right order, creating the embedding matrix layer = tf.nn.embedding_lookup(layer, self._multibucket.placeholder) #layer = tf.nn.embedding_lookup(layers, self._multibucket.placeholder, partition_strategy='div') #layer = tf.Print(layer, [tf.shape(layer)]) # Get the embeddings from the embedding matrix layer = tf.nn.embedding_lookup(layer, self.placeholder) if embed_keep_prob < 1: layer = self.drop_func(layer, embed_keep_prob) return layer
def build_graph(self, input_network_outputs={}, reuse=True): """""" with tf.variable_scope('Embeddings'): if self.sum_pos: # TODO this should be done with a `POSMultivocab` pos_vocabs = list( filter(lambda x: 'POS' in x.classname, self.input_vocabs)) pos_tensors = [ input_vocab.get_input_tensor(embed_keep_prob=1, reuse=reuse) for input_vocab in pos_vocabs ] non_pos_tensors = [ input_vocab.get_input_tensor(reuse=reuse) for input_vocab in self.input_vocabs if 'POS' not in input_vocab.classname ] #pos_tensors = [tf.Print(pos_tensor, [pos_tensor]) for pos_tensor in pos_tensors] #non_pos_tensors = [tf.Print(non_pos_tensor, [non_pos_tensor]) for non_pos_tensor in non_pos_tensors] if pos_tensors: pos_tensors = tf.add_n(pos_tensors) if not reuse: pos_tensors = [ pos_vocabs[0].drop_func( pos_tensors, pos_vocabs[0].embed_keep_prob) ] else: pos_tensors = [pos_tensors] input_tensors = non_pos_tensors + pos_tensors else: input_tensors = [ input_vocab.get_input_tensor(reuse=reuse) for input_vocab in self.input_vocabs ] for input_network, output in input_network_outputs: with tf.variable_scope(input_network.classname): input_tensors.append( input_network.get_input_tensor(output, reuse=reuse)) layer = tf.concat(input_tensors, 2) n_nonzero = tf.to_float( tf.count_nonzero(layer, axis=-1, keep_dims=True)) batch_size, bucket_size, input_size = nn.get_sizes(layer) layer *= input_size / (n_nonzero + tf.constant(1e-12)) token_weights = nn.greater(self.id_vocab.placeholder, 0) tokens_per_sequence = tf.reduce_sum(token_weights, axis=1) n_tokens = tf.reduce_sum(tokens_per_sequence) n_sequences = tf.count_nonzero(tokens_per_sequence) seq_lengths = tokens_per_sequence + 1 tokens = { 'n_tokens': n_tokens, 'tokens_per_sequence': tokens_per_sequence, 'token_weights': token_weights, 'n_sequences': n_sequences } conv_keep_prob = 1. if reuse else self.conv_keep_prob recur_keep_prob = 1. if reuse else self.recur_keep_prob recur_include_prob = 1. if reuse else self.recur_include_prob for i in six.moves.range(self.n_layers): conv_width = self.first_layer_conv_width if not i else self.conv_width with tf.variable_scope('RNN-{}'.format(i)): layer, _ = recurrent.directed_RNN( layer, self.recur_size, seq_lengths, bidirectional=self.bidirectional, recur_cell=self.recur_cell, conv_width=conv_width, recur_func=self.recur_func, conv_keep_prob=conv_keep_prob, recur_include_prob=recur_include_prob, recur_keep_prob=recur_keep_prob, cifg=self.cifg, highway=self.highway, highway_func=self.highway_func, bilin=self.bilin) output_fields = {vocab.field: vocab for vocab in self.output_vocabs} outputs = {} with tf.variable_scope('Classifiers'): if 'deprel' in output_fields: vocab = output_fields['deprel'] if vocab.factorized: head_vocab = output_fields['dephead'] with tf.variable_scope('Unlabeled'): unlabeled_outputs = head_vocab.get_bilinear_classifier( layer, token_weights=token_weights, reuse=reuse) with tf.variable_scope('Labeled'): labeled_outputs = vocab.get_bilinear_classifier( layer, unlabeled_outputs, token_weights=token_weights, reuse=reuse) else: labeled_outputs = vocab.get_unfactored_bilinear_classifier( layer, head_vocab.placeholder, token_weights=token_weights, reuse=reuse) outputs['deptree'] = labeled_outputs self._evals.add('deptree') if 'ufeats' in output_fields: vocab = output_fields['ufeats'] outputs[vocab.field] = vocab.get_bilinear_classifier( layer, labeled_outputs, token_weights=token_weights, reuse=reuse) self._evals.add('ufeats') elif 'dephead' in output_fields: vocab = output_fields['dephead'] outputs[vocab.classname] = vocab.get_bilinear_classifier( layer, token_weights=token_weights, reuse=reuse) self._evals.add('dephead') return outputs, tokens
def build_graph(self, input_network_outputs={}, reuse=True, debug=False, nornn=False): """""" #pdb.set_trace() with tf.variable_scope('Embeddings'): if self.sum_pos: # TODO this should be done with a `POSMultivocab` pos_vocabs = list( filter(lambda x: 'POS' in x.classname, self.input_vocabs)) pos_tensors = [ input_vocab.get_input_tensor(embed_keep_prob=1, reuse=reuse) for input_vocab in pos_vocabs ] non_pos_tensors = [ input_vocab.get_input_tensor(reuse=reuse) for input_vocab in self.input_vocabs if 'POS' not in input_vocab.classname ] #pos_tensors = [tf.Print(pos_tensor, [pos_tensor]) for pos_tensor in pos_tensors] #non_pos_tensors = [tf.Print(non_pos_tensor, [non_pos_tensor]) for non_pos_tensor in non_pos_tensors] if pos_tensors: pos_tensors = tf.add_n(pos_tensors) if not reuse: pos_tensors = [ pos_vocabs[0].drop_func( pos_tensors, pos_vocabs[0].embed_keep_prob) ] else: pos_tensors = [pos_tensors] input_tensors = non_pos_tensors + pos_tensors else: #run this input_tensors = [ input_vocab.get_input_tensor(reuse=reuse) for input_vocab in self.input_vocabs ] for input_network, output in input_network_outputs: with tf.variable_scope(input_network.classname): input_tensors.append( input_network.get_input_tensor(output, reuse=reuse)) layer = tf.concat( input_tensors, 2) #batch*sentence*feature? or batch* sentence^2*feature? #pdb.set_trace() n_nonzero = tf.to_float(tf.count_nonzero(layer, axis=-1, keepdims=True)) batch_size, bucket_size, input_size = nn.get_sizes(layer) layer *= input_size / (n_nonzero + tf.constant(1e-12)) token_weights = nn.greater(self.id_vocab.placeholder, 0) #find sentence length tokens_per_sequence = tf.reduce_sum(token_weights, axis=1) seq_lengths = tokens_per_sequence + 1 #batch size list of sentence length if self.use_seq2seq: token_weights = nn.greater(self.node_id_vocab.placeholder, 0) #find sentence length bucket_size = tf.shape(self.node_id_vocab.placeholder)[1] tokens_per_sequence = tf.reduce_sum(token_weights, axis=1) node_lengths = tokens_per_sequence + 2 # for rnn decoder # here we remove the the <bos> token for simplicity token_weights = nn.greater(self.node_id_vocab.placeholder[:, 1:-1], 0) #find sentence length bucket_size = tf.shape(token_weights)[1] tokens_per_sequence = tf.reduce_sum(token_weights, axis=1) n_tokens = tf.reduce_sum(tokens_per_sequence) n_sequences = tf.count_nonzero(tokens_per_sequence) #pdb.set_trace() root_weights = token_weights + (1 - nn.greater(tf.range(bucket_size), 0)) token_weights3D = tf.expand_dims( token_weights, axis=-1) * tf.expand_dims(root_weights, axis=-2) token_weights2D = tf.expand_dims( root_weights, axis=-1) * tf.expand_dims(root_weights, axis=-2) # as our three dimension a b c, is a->b to deciding, so all binary potential should not contain root(x) # in fact root should contained in second order prediction except sibling, but for simpler we set all for same token_weights4D = tf.cast( tf.expand_dims(token_weights2D, axis=-3) * tf.expand_dims(tf.expand_dims(root_weights, axis=-1), axis=-1), dtype=tf.float32) # abc -> ab,ac #token_weights_sib = tf.cast(tf.expand_dims(root_, axis=-3) * tf.expand_dims(tf.expand_dims(root_weights, axis=-1),axis=-1),dtype=tf.float32) #abc -> ab,cb #pdb.set_trace() token_weights_cop = tf.cast( tf.expand_dims(token_weights2D, axis=-2) * tf.expand_dims(tf.expand_dims(token_weights, axis=1), axis=-1), dtype=tf.float32) token_weights_cop_0 = token_weights_cop[:, 0] * tf.cast( tf.transpose(token_weights3D, [0, 2, 1]), dtype=tf.float32) token_weights_cop = tf.concat( [token_weights_cop_0[:, None, :], token_weights_cop[:, 1:]], 1) #data=np.stack((devprint['printdata']['layer_cop'][0][0]*devprint['token_weights3D'][0].T)[None,:],devprint['printdata']['layer_cop'][0][1:]) #abc -> ab, bc token_weights_gp = tf.cast( tf.expand_dims(tf.transpose(token_weights3D, [0, 2, 1]), axis=-1) * tf.expand_dims(tf.expand_dims(token_weights, axis=1), axis=1), dtype=tf.float32) #abc -> ca, ab token_weights_gp2 = tf.cast( tf.expand_dims(token_weights3D, axis=2) * tf.expand_dims(tf.expand_dims(token_weights, axis=-1), axis=1), dtype=tf.float32) token_weights_sib = token_weights_gp #token_weights4D = tf.expand_dims(token_weights3D, axis=-3) * tf.expand_dims(tf.expand_dims(token_weights, axis=-1),axis=-1) tokens = { 'n_tokens': n_tokens, 'tokens_per_sequence': tokens_per_sequence, 'token_weights': token_weights, 'token_weights3D': token_weights, 'n_sequences': n_sequences } conv_keep_prob = 1. if reuse else self.conv_keep_prob recur_keep_prob = 1. if reuse else self.recur_keep_prob recur_include_prob = 1. if reuse else self.recur_include_prob #R=BiLSTM(X) # pdb.set_trace() for i in six.moves.range(self.n_layers): conv_width = self.first_layer_conv_width if not i else self.conv_width #''' if not nornn and not self.nornn: with tf.variable_scope('RNN-{}'.format(i)): layer, sentence_feat = recurrent.directed_RNN( layer, self.recur_size, seq_lengths, bidirectional=self.bidirectional, recur_cell=self.recur_cell, conv_width=conv_width, recur_func=self.recur_func, conv_keep_prob=conv_keep_prob, recur_include_prob=recur_include_prob, recur_keep_prob=recur_keep_prob, cifg=self.cifg, highway=self.highway, highway_func=self.highway_func, bilin=self.bilin) #''' if self.separate_prediction: print('separating the whole two pipeline') with tf.device('/device:GPU:1'): for i in six.moves.range(self.n_layers): conv_width = self.first_layer_conv_width if not i else self.conv_width #''' if not nornn and not self.nornn: with tf.variable_scope('RNN2-{}'.format(i)): layer_rel, sentence_feat = recurrent.directed_RNN( layer, self.recur_size, seq_lengths, bidirectional=self.bidirectional, recur_cell=self.recur_cell, conv_width=conv_width, recur_func=self.recur_func, conv_keep_prob=conv_keep_prob, recur_include_prob=recur_include_prob, recur_keep_prob=recur_keep_prob, cifg=self.cifg, highway=self.highway, highway_func=self.highway_func, bilin=self.bilin) else: layer_rel = layer #pdb.set_trace() output_fields = {vocab.field: vocab for vocab in self.output_vocabs} outputs = {} #parser/structs/vocabs/token_vocabs.py loss is calculated in get_... # pdb.set_trace() # for seq2seq, create new features if 'correspond_word' in output_fields: print('use seq2seq model for node prediction') with tf.variable_scope('Seq2SeqDecoder'): sequence_length = {} # here we remove 'root' node in the source sentence.(x) sequence_length['source'] = seq_lengths sequence_length['target'] = node_lengths pos_vocabs = [] lemma_vocabs = [] for input_vocab in self.input_vocabs: if 'POS' in input_vocab.classname: pos_vocabs.append( input_vocab.get_input_tensor(reuse=reuse)) if 'Lemma' in input_vocab.classname: lemma_vocabs.append( input_vocab.get_input_tensor(reuse=reuse)) reinput_tensors = pos_vocabs + lemma_vocabs reinput_tensors = tf.concat(reinput_tensors, 2) seq2seq_input_tensors = [] if len(self.decoder_vocabs) > 0: # get node label embedding seq2seq_input_tensors = [ decoder_vocab.get_input_tensor(reuse=reuse) for decoder_vocab in self.decoder_vocabs if 'Copy' not in decoder_vocab.classname ] # pdb.set_trace() pointer_generator_inputs = { decoder_vocab.classname[7:-5]: decoder_vocab.placeholder for decoder_vocab in self.decoder_vocabs if 'Copy' in decoder_vocab.classname } input_shape = seq2seq_input_tensors[0].shape target_placeholder = output_fields[ 'correspond_word'].placeholder #target_tensor_shape=[input_shape[0],input_shape[1],reinput_tensors.shape[-1]] mapping = nn.greater(target_placeholder, 0) result_tensor = tf.batch_gather( reinput_tensors, mapping) * tf.cast( (mapping > 0), dtype=tf.float32)[:, :, None] input_features = seq2seq_input_tensors + [result_tensor] input_feature = tf.concat(input_features, 2) #input_tensors = [input_vocab.get_input_tensor(reuse=reuse) for input_vocab in self.input_vocabs] #pdb.set_trace() # [batch, num_sequence_tokens+1, hidden], [batch, num_node_tokens+2,hidden] -> [batch, num_node_tokens+2, num_sequence_tokens+1] # here remove the "root" node from sentence encoder, so the output layer is one sequence smaller, and the mask "token_weights3D" should be smaller as well(x) node_encoding = output_fields['correspond_word'].forward( layer, input_feature[:, :-1], sentence_feat, token_weights3D, sequence_length) if 'label' in output_fields: # pdb.set_trace() self._evals.add('label') label_vocab = output_fields['label'] label_vocab.predictor = PointerGenerator( label_vocab.hidden_size, label_vocab.hidden_size, len(label_vocab), 0, True, label_vocab.hidden_func, label_vocab.hidden_keep_prob) node_outputs = label_vocab.forward( node_encoding['values'], node_encoding['SrcWeights'], node_encoding['CorefWeights'], pointer_generator_inputs, debug=debug) outputs['label'] = node_outputs # pdb.set_trace() # remove the start and end token layer = node_encoding['values'][:, :-1] layer_rel = layer # pdb.set_trace() #layers with tf.variable_scope('Classifiers'): if 'semrel' in output_fields: vocab = output_fields['semrel'] head_vocab = output_fields['semhead'] head_vocab.token_weights_sib = token_weights_sib head_vocab.token_weights_cop = token_weights_cop head_vocab.token_weights_gp = token_weights_gp head_vocab.token_weights_gp2 = token_weights_gp2 head_vocab.token_weights = token_weights if vocab.factorized: with tf.variable_scope('Unlabeled'): #pdb.set_trace() if self.layer_mask(head_vocab): unlabeled_outputs = head_vocab.get_bilinear_discriminator( layer, token_weights=token_weights3D, reuse=reuse, debug=debug, token_weights4D=token_weights4D) else: unlabeled_outputs = head_vocab.get_bilinear_discriminator( layer, token_weights=token_weights3D, reuse=reuse, debug=debug) if self.two_gpu: with tf.device('/device:GPU:1'): with tf.variable_scope('Labeled'): labeled_outputs = vocab.get_bilinear_classifier( layer_rel, unlabeled_outputs, token_weights=token_weights3D, reuse=reuse, debug=debug) else: with tf.variable_scope('Labeled'): labeled_outputs = vocab.get_bilinear_classifier( layer_rel, unlabeled_outputs, token_weights=token_weights3D, reuse=reuse, debug=debug) else: labeled_outputs = vocab.get_unfactored_bilinear_classifier( layer, head_vocab.placeholder, token_weights=token_weights3D, reuse=reuse) outputs['semgraph'] = labeled_outputs self._evals.add('semgraph') elif 'semhead' in output_fields: vocab = output_fields['semhead'] outputs[vocab.classname] = vocab.get_bilinear_classifier( layer, token_weights=token_weights3D, reuse=reuse) self._evals.add('semhead') if 'attr' in output_fields: print('predict attributes') attr_vocab = output_fields['attr'] with tf.variable_scope('Attribute'): attr_outputs = attr_vocab.get_bilinear_classifier( layer_rel, labeled_outputs, token_weights=token_weights[:, :, None], reuse=reuse, debug=debug) self._evals.add('attribute') outputs['attribute'] = attr_outputs # if 'semgraph' in outputs: # outputs['semgraph']['loss'] = tf.zeros(outputs['attribute']['loss'].shape,dtype=tf.float32) # ------------------------------------------------------------------------- if 'frame' in output_fields: print('predict sdp frames') frame_vocab = output_fields['frame'] with tf.variable_scope('Frame'): frame_outputs = frame_vocab.get_linear_classifier( layer_rel, token_weights, reuse=reuse, debug=debug) self._evals.add('frame') # pdb.set_trace() outputs['frame'] = frame_outputs # --------------------------------------------------------------------------- if debug: outputs['semgraph']['token_weights'] = token_weights outputs['semgraph']['token_weights3D'] = token_weights3D outputs['semgraph']['root_weights'] = root_weights outputs['semgraph']['token_weights4D'] = token_weights4D outputs['semgraph']['token_weights_sib'] = token_weights_sib outputs['semgraph']['token_weights_cop'] = token_weights_cop outputs['semgraph']['token_weights_gp'] = token_weights_gp outputs['semgraph']['token_weights_gp2'] = token_weights_gp2 outputs['semgraph']['printdata'][ 'word_postag'] = self.input_vocabs[-1].placeholder if 'correspond_word' in output_fields: outputs['semgraph']['input_feature'] = input_feature if debug: outputs['semgraph']['decoder'] = node_encoding outputs['semgraph']['nodes'] = node_outputs if 'ufeats' in output_fields: outputs['semgraph']['frame'] = outputs['frame'] return outputs, tokens
def build_graph(self, input_network_outputs={}, reuse=True, debug=False, nornn=False): """""" #pdb.set_trace() with tf.variable_scope('Embeddings'): if self.sum_pos: # TODO this should be done with a `POSMultivocab` pos_vocabs = list( filter(lambda x: 'POS' in x.classname, self.input_vocabs)) pos_tensors = [ input_vocab.get_input_tensor(embed_keep_prob=1, reuse=reuse) for input_vocab in pos_vocabs ] non_pos_tensors = [ input_vocab.get_input_tensor(reuse=reuse) for input_vocab in self.input_vocabs if 'POS' not in input_vocab.classname ] #pos_tensors = [tf.Print(pos_tensor, [pos_tensor]) for pos_tensor in pos_tensors] #non_pos_tensors = [tf.Print(non_pos_tensor, [non_pos_tensor]) for non_pos_tensor in non_pos_tensors] if pos_tensors: pos_tensors = tf.add_n(pos_tensors) if not reuse: pos_tensors = [ pos_vocabs[0].drop_func( pos_tensors, pos_vocabs[0].embed_keep_prob) ] else: pos_tensors = [pos_tensors] input_tensors = non_pos_tensors + pos_tensors else: #run this input_tensors = [ input_vocab.get_input_tensor(reuse=reuse) for input_vocab in self.input_vocabs ] for input_network, output in input_network_outputs: with tf.variable_scope(input_network.classname): input_tensors.append( input_network.get_input_tensor(output, reuse=reuse)) layer = tf.concat( input_tensors, 2) #batch*sentence*feature? or batch* sentence^2*feature? n_nonzero = tf.to_float(tf.count_nonzero(layer, axis=-1, keepdims=True)) batch_size, bucket_size, input_size = nn.get_sizes(layer) layer *= input_size / (n_nonzero + tf.constant(1e-12)) token_weights = nn.greater(self.id_vocab.placeholder, 0) #find sentence length tokens_per_sequence = tf.reduce_sum(token_weights, axis=1) n_tokens = tf.reduce_sum(tokens_per_sequence) n_sequences = tf.count_nonzero(tokens_per_sequence) seq_lengths = tokens_per_sequence + 1 #batch size list of sentence length root_weights = token_weights + (1 - nn.greater(tf.range(bucket_size), 0)) token_weights3D = tf.expand_dims( token_weights, axis=-1) * tf.expand_dims(root_weights, axis=-2) token_weights2D = tf.expand_dims( token_weights, axis=-1) * tf.expand_dims(token_weights, axis=-2) # as our three dimension a b c, is a->b to deciding, so all binary potential should not contain root token_weights4D = tf.cast( tf.expand_dims(token_weights2D, axis=-3) * tf.expand_dims(tf.expand_dims(token_weights, axis=-1), axis=-1), dtype=tf.float32) #token_weights4D = tf.expand_dims(token_weights3D, axis=-3) * tf.expand_dims(tf.expand_dims(token_weights, axis=-1),axis=-1) tokens = { 'n_tokens': n_tokens, 'tokens_per_sequence': tokens_per_sequence, 'token_weights': token_weights, 'token_weights3D': token_weights, 'n_sequences': n_sequences } conv_keep_prob = 1. if reuse else self.conv_keep_prob recur_keep_prob = 1. if reuse else self.recur_keep_prob recur_include_prob = 1. if reuse else self.recur_include_prob #R=BiLSTM(X) #pdb.set_trace() for i in six.moves.range(self.n_layers): conv_width = self.first_layer_conv_width if not i else self.conv_width #''' if not nornn: with tf.variable_scope('RNN-{}'.format(i)): layer, _ = recurrent.directed_RNN( layer, self.recur_size, seq_lengths, bidirectional=self.bidirectional, recur_cell=self.recur_cell, conv_width=conv_width, recur_func=self.recur_func, conv_keep_prob=conv_keep_prob, recur_include_prob=recur_include_prob, recur_keep_prob=recur_keep_prob, cifg=self.cifg, highway=self.highway, highway_func=self.highway_func, bilin=self.bilin) #''' #pdb.set_trace() output_fields = {vocab.field: vocab for vocab in self.output_vocabs} outputs = {} #parser/structs/vocabs/token_vocabs.py loss is calculated in get_... with tf.variable_scope('Classifiers'): if 'semrel' in output_fields: vocab = output_fields['semrel'] head_vocab = output_fields['semhead'] if vocab.factorized: if self.label_end2end: with tf.variable_scope('Labeled'): labeled_outputs = vocab.get_bilinear_classifier( layer, head_vocab.placeholder, token_weights=token_weights3D, reuse=reuse, debug=debug) with tf.variable_scope('Unlabeled'): if self.layer_mask(head_vocab): unlabeled_outputs = head_vocab.get_bilinear_discriminator( layer, token_weights=token_weights3D, reuse=reuse, debug=debug, token_weights4D=token_weights4D, prev_output=labeled_outputs) else: unlabeled_outputs = head_vocab.get_bilinear_discriminator( layer, token_weights=token_weights3D, reuse=reuse, debug=debug, prev_output=labeled_outputs) labeled_outputs = unlabeled_outputs else: with tf.variable_scope('Unlabeled'): #pdb.set_trace() if self.layer_mask(head_vocab): unlabeled_outputs = head_vocab.get_bilinear_discriminator( layer, token_weights=token_weights3D, reuse=reuse, debug=debug, token_weights4D=token_weights4D) else: unlabeled_outputs = head_vocab.get_bilinear_discriminator( layer, token_weights=token_weights3D, reuse=reuse, debug=debug) with tf.variable_scope('Labeled'): labeled_outputs = vocab.get_bilinear_classifier( layer, unlabeled_outputs, token_weights=token_weights3D, reuse=reuse, debug=debug) else: labeled_outputs = vocab.get_unfactored_bilinear_classifier( layer, head_vocab.placeholder, token_weights=token_weights3D, reuse=reuse) outputs['semgraph'] = labeled_outputs self._evals.add('semgraph') elif 'semhead' in output_fields: vocab = output_fields['semhead'] outputs[vocab.classname] = vocab.get_bilinear_classifier( layer, token_weights=token_weights3D, reuse=reuse) self._evals.add('semhead') if debug: outputs['semgraph']['token_weights'] = token_weights outputs['semgraph']['token_weights3D'] = token_weights3D outputs['semgraph']['root_weights'] = root_weights outputs['semgraph']['token_weights4D'] = token_weights4D return outputs, tokens
def build_graph(self, input_network_outputs={}, reuse=True): """""" with tf.variable_scope('Embeddings'): input_tensors = [input_vocab.get_input_tensor(reuse=reuse) for input_vocab in self.input_vocabs] for input_network, output in input_network_outputs: with tf.variable_scope(input_network.classname): input_tensors.append(input_network.get_input_tensor(output, reuse=reuse)) layer = tf.concat(input_tensors, 2) batch_size, bucket_size, input_size = nn.get_sizes(layer) n_nonzero = tf.to_float(tf.count_nonzero(layer, axis=-1, keep_dims=True)) layer *= input_size / (n_nonzero + tf.constant(1e-12)) token_weights = nn.greater(self.id_vocab.placeholder, 0) tokens_per_sequence = tf.reduce_sum(token_weights, axis=1) n_tokens = tf.reduce_sum(tokens_per_sequence) n_sequences = tf.count_nonzero(tokens_per_sequence) seq_lengths = tokens_per_sequence + 1 tokens = {'n_tokens': n_tokens, 'tokens_per_sequence': tokens_per_sequence, 'token_weights': token_weights, 'n_sequences': n_sequences} conv_keep_prob = 1. if reuse else self.conv_keep_prob recur_keep_prob = 1. if reuse else self.recur_keep_prob recur_include_prob = 1. if reuse else self.recur_include_prob for i in six.moves.range(self.n_layers): conv_width = self.first_layer_conv_width if not i else self.conv_width with tf.variable_scope('RNN-{}'.format(i)): layer, _ = recurrent.directed_RNN(layer, self.recur_size, seq_lengths, bidirectional=self.bidirectional, recur_cell=self.recur_cell, conv_width=conv_width, recur_func=self.recur_func, conv_keep_prob=conv_keep_prob, recur_keep_prob=recur_keep_prob, recur_include_prob=recur_include_prob, cifg=self.cifg, highway=self.highway, highway_func=self.highway_func, bilin=self.bilin) output_vocabs = {vocab.field: vocab for vocab in self.output_vocabs} outputs = {} with tf.variable_scope('Classifiers'): last_output = None if 'lemma' in output_vocabs: vocab = output_vocabs['lemma'] outputs[vocab.field] = vocab.get_linear_classifier( layer, token_weights, last_output if self.share_layer else None, reuse=reuse) self._evals.add('lemma') if last_output is None: last_output = outputs[vocab.field] if 'upos' in output_vocabs: vocab = output_vocabs['upos'] outputs[vocab.field] = vocab.get_linear_classifier( layer, token_weights, last_output if self.share_layer else None, reuse=reuse) self._evals.add('upos') if last_output is None: last_output = outputs[vocab.field] if reuse: upos_idxs = outputs[vocab.field]['predictions'] else: upos_idxs = outputs[vocab.field]['targets'] upos_embed = vocab.get_input_tensor(inputs=upos_idxs, embed_keep_prob=1, reuse=reuse) if 'xpos' in output_vocabs and not self.share_layer: vocab = output_vocabs['xpos'] outputs[vocab.field] = vocab.get_bilinear_classifier_with_embeddings( layer, upos_embed, token_weights, reuse=reuse) self._evals.add('xpos') if 'ufeats' in output_vocabs and not self.share_layer: vocab = output_vocabs['ufeats'] outputs[vocab.field] = vocab.get_bilinear_classifier_with_embeddings( layer, upos_embed, token_weights, reuse=reuse) self._evals.add('ufeats') #if 'ufeats' in output_vocabs and not self.share_layer: # vocab = output_vocabs['ufeats'] # outputs[vocab.field] = vocab.get_bilinear_classifier_with_embeddings( # layer, upos_embed, token_weights, # reuse=reuse) # self._evals.add('ufeats') if 'xpos' in output_vocabs and ('upos' not in output_vocabs or self.share_layer): vocab = output_vocabs['xpos'] outputs[vocab.field] = vocab.get_linear_classifier( layer, token_weights, last_output if self.share_layer else None, reuse=reuse) self._evals.add('xpos') if last_output is None: last_output = outputs[vocab.field] if 'ufeats' in output_vocabs and ('upos' not in output_vocabs or self.share_layer): #if 'ufeats' in output_vocabs and ('upos' not in output_vocabs or self.share_layer): vocab = output_vocabs['ufeats'] outputs[vocab.field] = vocab.get_linear_classifier( layer, token_weights, last_output if self.share_layer else None, reuse=reuse) self._evals.add('ufeats') if last_output is None: last_output = outputs[vocab.field] if 'deprel' in output_vocabs: vocab = output_vocabs['deprel'] outputs[vocab.field] = vocab.get_linear_classifier( layer, token_weights, last_output if self.share_layer else None, reuse=reuse) self._evals.add('deprel') if last_output is None: last_output = outputs[vocab.field] return outputs, tokens
def build_graph(self, input_network_outputs={}, reuse=True): """""" outputs = {} with tf.variable_scope('Embeddings'): input_tensors = [ input_vocab.get_input_tensor(reuse=reuse) for input_vocab in self.input_vocabs ] for input_network, output in input_network_outputs: with tf.variable_scope(input_network.classname): input_tensors.append( input_network.get_input_tensor(output, reuse=reuse)) layer = tf.concat(input_tensors, 2) n_nonzero = tf.to_float( tf.count_nonzero(layer, axis=-1, keep_dims=True)) batch_size, bucket_size, input_size = nn.get_sizes(layer) layer *= input_size / (n_nonzero + tf.constant(1e-12)) token_weights = nn.greater(self.id_vocab.placeholder, 0, dtype=tf.int32) tokens_per_sequence = tf.reduce_sum(token_weights, axis=1) n_tokens = tf.reduce_sum(tokens_per_sequence) n_sequences = tf.count_nonzero(tokens_per_sequence) seq_lengths = tokens_per_sequence + 1 tokens = { 'n_tokens': n_tokens, 'tokens_per_sequence': tokens_per_sequence, 'token_weights': token_weights, 'n_sequences': n_sequences } conv_keep_prob = 1. if reuse else self.conv_keep_prob recur_keep_prob = 1. if reuse else self.recur_keep_prob recur_include_prob = 1. if reuse else self.recur_include_prob rev_layer = tf.reverse_sequence(layer, seq_lengths, seq_axis=2) for i in six.moves.range(self.n_layers): conv_width = self.first_layer_conv_width if not i else self.conv_width with tf.variable_scope('RNN_FW-{}'.format(i)): layer, _ = recurrent.directed_RNN( layer, self.recur_size, seq_lengths, bidirectional=False, recur_cell=self.recur_cell, conv_width=conv_width, recur_func=self.recur_func, conv_keep_prob=conv_keep_prob, recur_include_prob=recur_include_prob, recur_keep_prob=recur_keep_prob, cifg=self.cifg, highway=self.highway, highway_func=self.highway_func) if self.bidirectional: with tf.variable_scope('RNN_BW-{}'.format(i)): rev_layer, _ = recurrent.directed_RNN( rev_layer, self.recur_size, seq_lengths, bidirectional=False, recur_cell=self.recur_cell, conv_width=conv_width, recur_func=self.recur_func, conv_keep_prob=conv_keep_prob, recur_keep_prob=recur_keep_prob, recur_include_prob=recur_include_prob, cifg=self.cifg, highway=self.highway, highway_func=self.highway_func) ones = tf.ones([batch_size, 1, 1]) with tf.variable_scope('RNN_FW-{}/RNN/Loop'.format(i), reuse=True): fw_initial_state = tf.get_variable('Initial_state') n_splits = fw_initial_state.get_shape().as_list( )[-1] / self.recur_size fw_initial_state = tf.split(fw_initial_state, int(n_splits), -1)[0] start_token = ones * fw_initial_state layer = tf.reverse_sequence(layer, seq_lengths, seq_axis=2) layer = layer[:, 1:] layer = tf.reverse_sequence(layer, seq_lengths - 1, seq_axis=2) layer = tf.concat([start_token, layer], axis=1) if self.bidirectional: with tf.variable_scope('RNN_BW-{}/RNN/Loop'.format(i), reuse=True): bw_initial_state = tf.get_variable('Initial_state') n_splits = bw_initial_state.get_shape().as_list( )[-1] / self.recur_size bw_initial_state = tf.split(bw_initial_state, int(n_splits), -1)[0] stop_token = ones * bw_initial_state rev_layer = tf.concat([stop_token, layer], axis=1) rev_layer = tf.reverse_sequence(rev_layer, seq_lengths + 1, seq_axis=2)[:, 1:] if self.bilin: layer = tf.concat([layer * rev_layer, layer, rev_layer], axis=2) else: layer = tf.concat([layer, rev_layer], axis=2) output_vocabs = {vocab.field: vocab for vocab in self.output_vocabs} outputs = {} with tf.variable_scope('Classifiers'): if 'form' in output_vocabs: vocab = output_vocabs['form'] outputs[vocab.field] = vocab.get_sampled_linear_classifier( layer, self.n_samples, token_weights=token_weights, reuse=reuse) self._evals.add('form') if 'upos' in output_vocabs: vocab = output_vocabs['upos'] outputs[vocab.field] = vocab.get_linear_classifier( layer, token_weights=token_weights, reuse=reuse) self._evals.add('upos') if 'xpos' in output_vocabs: vocab = output_vocabs['xpos'] outputs[vocab.field] = vocab.get_linear_classifier( layer, token_weights=token_weights, reuse=reuse) self._evals.add('xpos') return outputs, tokens