def feature_detector_blk(max_depth=2): """Input: node dict Output: TensorType([hyper.conv_dim, ]) Single patch of the conv. Depth is max_depth """ blk = td.Composition() with blk.scope(): nodes_in_patch = collect_node_for_conv_patch_blk( max_depth=max_depth).reads(blk.input) # map from python object to tensors mapped = td.Map( td.Record((coding_blk(), td.Scalar(), td.Scalar(), td.Scalar(), td.Scalar()))).reads(nodes_in_patch) # mapped = [(feature, idx, depth, max_depth), (...)] # compute weighted feature for each elem weighted = td.Map(weighted_feature_blk()).reads(mapped) # weighted = [fea, fea, fea, ...] # add together added = td.Reduce(td.Function(tf.add)).reads(weighted) # added = TensorType([hyper.conv_dim, ]) # add bias biased = td.Function(tf.add).reads(added, td.FromTensor(param.get('Bconv'))) # biased = TensorType([hyper.conv_dim, ]) # tanh tanh = td.Function(tf.nn.tanh).reads(biased) # tanh = TensorType([hyper.conv_dim, ]) blk.output.reads(tanh) return blk
def composed_embed_blk(): leaf_case = direct_embed_blk() nonleaf_case = td.Composition(name='composed_embed_nonleaf') with nonleaf_case.scope(): children = td.GetItem('children').reads(nonleaf_case.input) clen = td.Scalar().reads(td.GetItem('clen').reads(nonleaf_case.input)) cclens = td.Map(td.GetItem('clen') >> td.Scalar()).reads(children) fchildren = td.Map(direct_embed_blk()).reads(children) initial_state = td.Composition() with initial_state.scope(): initial_state.output.reads( td.FromTensor(tf.zeros(hyper.word_dim)), td.FromTensor(tf.zeros([])), ) summed = td.Zip().reads(fchildren, cclens, td.Broadcast().reads(clen)) summed = td.Fold(continous_weighted_add_blk(), initial_state).reads(summed)[0] added = td.Function(tf.add, name='add_bias').reads( summed, td.FromTensor(param.get('B'))) normed = clip_by_norm_blk().reads(added) act_fn = tf.nn.relu if hyper.use_relu else tf.nn.tanh relu = td.Function(act_fn).reads(normed) nonleaf_case.output.reads(relu) return td.OneOf(lambda node: node['clen'] == 0, { True: leaf_case, False: nonleaf_case })
def test_tri_combined(self, idx, pclen, depth, max_depth): """Test linear_combine_blk on data""" Wconvl = self.sess.run(tbcnn.param.get('Wconvl')) Wconvr = self.sess.run(tbcnn.param.get('Wconvr')) Wconvt = self.sess.run(tbcnn.param.get('Wconvt')) actual = (td.Scalar(), td.Scalar(), td.Scalar(), td.Scalar()) >> tbcnn.tri_combined_blk() actual = actual.eval((idx, pclen, depth, max_depth), session=self.sess) desired = tri_combined_np(idx, pclen, depth, max_depth, Wconvl, Wconvr, Wconvt) nptest.assert_allclose(actual, desired)
def test_linear_combine(self, clen, pclen, idx): """Test linear_combine_blk on data""" Wl = self.sess.run(embedding.param.get('Wl')) Wr = self.sess.run(embedding.param.get('Wr')) actual = (td.Scalar(), td.Scalar(), td.Scalar()) >> embedding.linear_combine_blk() actual = actual.eval((clen, pclen, idx), session=self.sess) desired = linear_combine_np(clen, pclen, idx, Wl, Wr) nptest.assert_allclose(actual, desired)
def embed_tree(logits_and_state, is_root): """Creates a block that embeds trees; output is tree LSTM state.""" return td.InputTransform(tokenize) >> td.OneOf( key_fn=lambda pair: pair[0] == '2', # label 2 means neutral case_blocks=(add_metrics(is_root, is_neutral=False), add_metrics(is_root, is_neutral=True)), pre_block=(td.Scalar('int32'), logits_and_state))
def train(batch_size=100): losses = [] net_block = reduce_net_block() compiler = td.Compiler.create((net_block, td.Scalar())) y, y_ = compiler.output_tensors loss = tf.nn.l2_loss(y - y_) train = tf.train.AdamOptimizer().minimize(loss) saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) validation_fd = compiler.build_feed_dict(random_example() for _ in range(1000)) for i in range(10000): sess.run(train, compiler.build_feed_dict(random_example() for _ in range(batch_size))) loss_val = sess.run(loss, validation_fd) losses.append(loss_val) if i % 100 == 0: # print (i, loss_val) saver.save(sess, save_dir, global_step=i) fig = plt.figure() plt.plot(losses) plt.title('Training Loss') plt.xlabel('Batch') plt.ylabel('Loss') fig.savefig('./imgs/avg.png', dpi=fig.dpi) return net_block
def logits_and_state(self): """Creates a block that goes from tokens to (logits, state) tuples.""" unknown_idx = len(self.vocab) def lookup_word(word): return self.vocab.get(word, unknown_idx) #(GetItem(key) >> block).eval(inp) => block.eval(inp[key]) # InputTransform(funk): A Python function, lifted to a block. # Scalar - input to scalar word2vec = (td.GetItem(0) >> td.InputTransform(lookup_word) >> td.Scalar('int32') >> self.word_embedding) # pair2vec = (self.embed_subtree(), self.embed_subtree()) # Trees are binary, so the tree layer takes two states as its # input_state. zero_state = td.Zeros((self.tree_lstm_cell.state_size, ) * 2) # Input is a word vector. zero_inp = td.Zeros(self.word_embedding.output_type.shape[0]) # AllOf(a, b, c).eval(inp) => (a.eval(inp), b.eval(inp), c.eval(inp)) word_case = td.AllOf(word2vec, zero_state) pair_case = td.AllOf(zero_inp, pair2vec) # OneOf(func, [(key, block),(key,block)])) where funk(input) => key and # OneOf returns one of blocks tree2vec = td.OneOf(len, [(1, word_case), (2, pair_case)]) return tree2vec >> self.tree_lstm_cell
def test_weighted_feature(self): root, _ = self._load_test_data() Wconvl = self.sess.run(tbcnn.param.get('Wconvl')) Wconvr = self.sess.run(tbcnn.param.get('Wconvr')) Wconvt = self.sess.run(tbcnn.param.get('Wconvt')) idx, pclen, depth, max_depth = (1., 1., 0., 2.) feature = tbcnn.coding_blk().eval(root, session=self.sess) actual = (td.Vector(feature.size), td.Scalar(), td.Scalar(), td.Scalar(), td.Scalar()) >> tbcnn.weighted_feature_blk() actual = actual.eval((feature, idx, pclen, depth, max_depth), session=self.sess) desired = np.matmul(feature, tri_combined_np(idx, pclen, depth, max_depth, Wconvl, Wconvr, Wconvt)) nptest.assert_allclose(actual, desired)
def linearLSTM_over_TreeLstm(self, num_classes, sent_lstm_num_units): self.sent_cell = td.ScopedLayer(tf.contrib.rnn.BasicLSTMCell( num_units=sent_lstm_num_units), name_or_scope = self._sent_lstm_default_scope_name) sent_lstm = (td.Map(self.tree_lstm.tree_lstm() >> td.Concat()) >> td.RNN(self.sent_cell)) self.output_layer = td.FC( num_classes, activation=None, name=self._output_layer_default_scope_name) return (td.Scalar('int32'), sent_lstm >> td.GetItem(1) >> td.GetItem(0) >> self.output_layer) \ >> self.set_metrics()
def create_compiler(): expr_left_sentence, expr_right_sentence = buid_sentence_expression( ), buid_sentence_expression() expr_label = td.InputTransform(lambda label: int(label)) >> td.OneHot( 2, dtype=tf.float32) id = td.Scalar(dtype=tf.int32) one_record = td.InputTransform(lambda record: json.loads(record)) >> \ td.Record((expr_left_sentence, expr_right_sentence, expr_label, id), name='instance') compiler = td.Compiler().create(one_record) return compiler
def buid_sentence_expression(): sentence_tree = td.InputTransform( lambda sentence_json: WordNode(sentence_json)) tree_rnn = td.ForwardDeclaration(td.PyObjectType()) leaf_case = td.GetItem( 'word_id', name='leaf_in') >> td.Scalar(dtype=tf.int32) >> embedding index_case = td.Record({'left': tree_rnn(), 'right': tree_rnn()}) \ >> td.Concat(name='concat_root_child') \ >> fc expr_sentence = td.OneOf(td.GetItem('leaf'), { True: leaf_case, False: index_case }, name='recur_in') tree_rnn.resolve_to(expr_sentence) return sentence_tree >> expr_sentence
def logits_and_state(): """Creates a block that goes from tokens to (logits, state) tuples.""" unknown_idx = len(word_idx) lookup_word = lambda word: word_idx.get( word) # unknown_idx is the default return value word2vec = ( td.GetItem(0) >> td.GetItem(0) >> td.InputTransform(lookup_word) >> td.Scalar('int32') >> word_embedding ) # <td.Pipe>: None -> TensorType((200,), 'float32') context2vec1 = td.GetItem(1) >> td.InputTransform( makeContextMat) >> td.Vector(10) context2vec2 = td.GetItem(1) >> td.InputTransform( makeContextMat) >> td.Vector(10) ent1posit1 = td.GetItem(2) >> td.InputTransform( makeEntPositMat) >> td.Vector(10) ent1posit2 = td.GetItem(2) >> td.InputTransform( makeEntPositMat) >> td.Vector(10) ent2posit1 = td.GetItem(3) >> td.InputTransform( makeEntPositMat) >> td.Vector(10) ent2posit2 = td.GetItem(3) >> td.InputTransform( makeEntPositMat) >> td.Vector(10) pairs2vec = td.GetItem(0) >> (embed_subtree(), embed_subtree()) # our binary Tree can have two child nodes, therefore, we assume the zero state have two child nodes. zero_state = td.Zeros((tree_lstm.state_size, ) * 2) # Input is a word vector. zero_inp = td.Zeros(word_embedding.output_type.shape[0] ) # word_embedding.output_type.shape[0] == 200 word_case = td.AllOf(word2vec, zero_state, context2vec1, ent1posit1, ent2posit1) children_case = td.AllOf(zero_inp, pairs2vec, context2vec2, ent1posit2, ent2posit2) # if leaf case, go to word case... tree2vec = td.OneOf(lambda x: 1 if len(x[0]) == 1 else 2, [(1, word_case), (2, children_case)]) # tree2vec = td.OneOf(lambda pair: len(pair[0]), [(1, word_case), (2, children_case)]) # logits and lstm states return tree2vec >> tree_lstm >> (output_layer, td.Identity())
def logits_and_state(): """Creates a block that goes from tokens to (logits, state) tuples.""" unknown_idx = len(word_idx) lookup_word = lambda word: word_idx.get(word, unknown_idx) word2vec = (td.GetItem(0) >> td.InputTransform(lookup_word) >> td.Scalar('int32') >> word_embedding) pair2vec = (embed_subtree(), embed_subtree()) # Trees are binary, so the tree layer takes two states as its input_state. zero_state = td.Zeros((tree_lstm.state_size, ) * 2) # Input is a word vector. zero_inp = td.Zeros(word_embedding.output_type.shape[0]) word_case = td.AllOf(word2vec, zero_state) pair_case = td.AllOf(zero_inp, pair2vec) tree2vec = td.OneOf(len, [(1, word_case), (2, pair_case)]) return tree2vec >> tree_lstm >> (output_layer, td.Identity())
def build_model(): # create model variables param.initialize_tbcnn_weights() # Compile the block and append fc layers tree_pooling = dynamic_pooling_blk() compiler = td.Compiler.create((tree_pooling, td.Scalar(dtype='int64'))) (pooled, batched_labels) = compiler.output_tensors fc1 = tf.nn.relu( tf.add(tf.matmul(pooled, param.get('FC1/weight')), param.get('FC1/bias'))) fc2 = tf.nn.relu( tf.add(tf.matmul(fc1, param.get('FC2/weight')), param.get('FC2/bias'))) # our prediction output with accuracy calc logits = tf.nn.softmax(fc2) correct_prediction = tf.equal(tf.argmax(logits, 1), batched_labels) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) batch_size_op = tf.unstack(tf.shape(batched_labels))[0] return compiler, fc2, logits, batched_labels, accuracy, batch_size_op
back_dir = (td.RNN(bw_cell) >> td.GetItem(0)).reads(bw_seq) back_to_leftright = td.Slice(step=-1).reads(back_dir) output_transform = td.FC(1, activation=None) bidir_common = (td.ZipWith( td.Concat() >> output_transform >> td.Metric('logits'))).reads( forward_dir, back_to_leftright) bidir_conv_lstm.output.reads(bidir_common) return bidir_conv_lstm CONV_data = td.Record((td.Map( td.Vector(vsize) >> td.Function(lambda x: tf.reshape(x, [-1, vsize, 1]))), td.Map(td.Scalar()))) CONV_model = (CONV_data >> bidirectional_dynamic_CONV( multi_convLSTM_cell([vsize, vsize, vsize], [100, 100, 100]), multi_convLSTM_cell([vsize, vsize, vsize], [100, 100, 100])) >> td.Void()) FC_data = td.Record((td.Map(td.Vector(vsize)), td.Map(td.Scalar()))) FC_model = (FC_data >> bidirectional_dynamic_FC(multi_FC_cell( [1000] * 5), multi_FC_cell([1000] * 5), 1000) >> td.Void()) store = data(FLAGS.data_dir + FLAGS.data_type, FLAGS.truncate) if FLAGS.model == "lstm": model = FC_model elif FLAGS.model == "convlstm": model = CONV_model else:
def reduce_net_block(): net_block = td.Concat() >> td.FC(20) >> td.FC(20) >> td.FC(1, activation=None) >> td.Function(lambda xs: tf.squeeze(xs, axis=1)) return td.Map(td.Scalar()) >> td.Reduce(net_block)
def embed_tree(is_root): return td.InputTransform(tokenize) >> ( td.Scalar('int32'), logits_and_state()) >> add_metrics(is_root)
def direct_embed_blk(): return (td.GetItem('name') >> td.Scalar('int32') >> td.Function(lambda x: tf.nn.embedding_lookup(param.get('We'), x)) >> clip_by_norm_blk())
def _compile(self): with self.sess.as_default(): import tensorflow_fold as td output_size = len(self.labels) self.keep_prob = tf.placeholder_with_default(tf.constant(1.0),shape=None) fshape = (self.window_size * (self.char_embedding_size + self.char_feature_embedding_size), self.num_filters) filt_w3 = tf.Variable(tf.random_normal(fshape, stddev=0.05)) def CNN_Window3(filters): return td.Function(lambda a, b, c: cnn_operation([a,b,c],filters)) def cnn_operation(window_sequences,filters): windows = tf.concat(window_sequences,axis=-1) products = tf.multiply(tf.expand_dims(windows,axis=-1),filters) return tf.reduce_sum(products,axis=-2) char_emb = td.Embedding(num_buckets=self.char_buckets, num_units_out=self.char_embedding_size) cnn_layer = (td.NGrams(self.window_size) >> td.Map(CNN_Window3(filt_w3)) >> td.Max()) # --------- char features def charfeature_lookup(c): if c in string.lowercase: return 0 elif c in string.uppercase: return 1 elif c in string.punctuation: return 2 else: return 3 char_input = td.Map(td.InputTransform(lambda c: ord(c.lower())) >> td.Scalar('int32') >> char_emb) char_features = td.Map(td.InputTransform(charfeature_lookup) >> td.Scalar(dtype='int32') >> td.Embedding(num_buckets=4, num_units_out=self.char_feature_embedding_size)) charlevel = (td.InputTransform(lambda s: ['~'] + [ c for c in s ] + ['~']) >> td.AllOf(char_input,char_features) >> td.ZipWith(td.Concat()) >> cnn_layer) # --------- word features word_emb = td.Embedding(num_buckets=len(self.word_vocab), num_units_out=self.embedding_size, initializer=self.word_embeddings) wordlookup = lambda w: (self.word_vocab.index(w.lower()) if w.lower() in self.word_vocab else 0) wordinput = (td.InputTransform(wordlookup) >> td.Scalar(dtype='int32') >> word_emb) def wordfeature_lookup(w): if re.match('^[a-z]+$',w): return 0 elif re.match('^[A-Z][a-z]+$',w): return 1 elif re.match('^[A-Z]+$',w): return 2 elif re.match('^[A-Za-z]+$',w): return 3 else: return 4 wordfeature = (td.InputTransform(wordfeature_lookup) >> td.Scalar(dtype='int32') >> td.Embedding(num_buckets=5, num_units_out=32)) #----------- rnn_fwdcell = td.ScopedLayer(tf.contrib.rnn.LSTMCell( num_units=self.rnn_dim), 'lstm_fwd') fwdlayer = td.RNN(rnn_fwdcell) >> td.GetItem(0) rnn_bwdcell = td.ScopedLayer(tf.contrib.rnn.LSTMCell( num_units=self.rnn_dim), 'lstm_bwd') bwdlayer = (td.Slice(step=-1) >> td.RNN(rnn_bwdcell) >> td.GetItem(0) >> td.Slice(step=-1)) rnn_layer = td.AllOf(fwdlayer, bwdlayer) >> td.ZipWith(td.Concat()) output_layer = td.FC(output_size, input_keep_prob=self.keep_prob, activation=None) wordlevel = td.AllOf(wordinput,wordfeature) >> td.Concat() network = (td.Map(td.AllOf(wordlevel,charlevel) >> td.Concat()) >> rnn_layer >> td.Map(output_layer) >> td.Map(td.Metric('y_out'))) >> td.Void() groundlabels = td.Map(td.Vector(output_size,dtype=tf.int32) >> td.Metric('y_true')) >> td.Void() self.compiler = td.Compiler.create((network, groundlabels)) self.y_out = self.compiler.metric_tensors['y_out'] self.y_true = self.compiler.metric_tensors['y_true'] self.y_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=self.y_out,labels=self.y_true)) self.y_prob = tf.nn.softmax(self.y_out) self.y_true_idx = tf.argmax(self.y_true,axis=-1) self.y_pred_idx = tf.argmax(self.y_prob,axis=-1) self.y_pred = tf.one_hot(self.y_pred_idx,depth=output_size,dtype=tf.int32) epoch_step = tf.Variable(0, trainable=False) self.epoch_step_op = tf.assign(epoch_step, epoch_step+1) lrate_decay = tf.train.exponential_decay(self.lrate, epoch_step, 1, self.decay) if self.optimizer == 'adam': self.opt = tf.train.AdamOptimizer(learning_rate=lrate_decay) elif self.optimizer == 'adagrad': self.opt = tf.train.AdagradOptimizer(learning_rate=lrate_decay, initial_accumulator_value=1e-08) elif self.optimizer == 'rmsprop': self.opt = tf.train.RMSPropOptimizer(learning_rate=lrate_decay, epsilon=1e-08) else: raise Exception(('The optimizer {} is not in list of available ' + 'optimizers: default, adam, adagrad, rmsprop.') .format(self.optimizer)) # apply learning multiplier on on embedding learning rate embeds = [word_emb.weights] grads_and_vars = self.opt.compute_gradients(self.y_loss) found = 0 for i, (grad, var) in enumerate(grads_and_vars): if var in embeds: found += 1 grad = tf.scalar_mul(self.embedding_factor, grad) grads_and_vars[i] = (grad, var) assert found == len(embeds) # internal consistency check self.train_step = self.opt.apply_gradients(grads_and_vars) self.sess.run(tf.global_variables_initializer()) self.saver = tf.train.Saver(max_to_keep=100)
def __init__(self, config, kb, text_seq_batch, seq_length_batch, num_vocab_txt, num_vocab_nmn, EOS_idx, num_choices, decoder_sampling, use_gt_layout=None, gt_layout_batch=None, scope='neural_module_network', reuse=None): with tf.variable_scope(scope, reuse=reuse): # Part 1: Seq2seq RNN to generate module layout tokens embedding_mat = tf.get_variable( 'embedding_mat', [num_vocab_txt, config.embed_dim_txt], initializer=tf.contrib.layers.xavier_initializer()) with tf.variable_scope('layout_generation'): att_seq2seq = netgen_att.AttentionSeq2Seq( config, text_seq_batch, seq_length_batch, num_vocab_txt, num_vocab_nmn, EOS_idx, decoder_sampling, embedding_mat, use_gt_layout, gt_layout_batch) self.att_seq2seq = att_seq2seq predicted_tokens = att_seq2seq.predicted_tokens token_probs = att_seq2seq.token_probs word_vecs = att_seq2seq.word_vecs neg_entropy = att_seq2seq.neg_entropy self.atts = att_seq2seq.atts self.predicted_tokens = predicted_tokens self.token_probs = token_probs self.word_vecs = word_vecs self.neg_entropy = neg_entropy # log probability of each generated sequence self.log_seq_prob = tf.reduce_sum(tf.log(token_probs), axis=0) # Part 2: Neural Module Network with tf.variable_scope('layout_execution'): modules = Modules(config, kb, word_vecs, num_choices, embedding_mat) self.modules = modules # Recursion of modules att_shape = [len(kb)] # Forward declaration of module recursion att_expr_decl = td.ForwardDeclaration(td.PyObjectType(), td.TensorType(att_shape)) # _key_find case_key_find = td.Record([ ('time_idx', td.Scalar(dtype='int32')), ('batch_idx', td.Scalar(dtype='int32')) ]) case_key_find = case_key_find >> td.ScopedLayer( modules.KeyFindModule, name_or_scope='KeyFindModule') # _key_filter case_key_filter = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32')) ]) case_key_filter = case_key_filter >> td.ScopedLayer( modules.KeyFilterModule, name_or_scope='KeyFilterModule') recursion_cases = td.OneOf(td.GetItem('module'), { '_key_find': case_key_find, '_key_filter': case_key_filter }) att_expr_decl.resolve_to(recursion_cases) # _val_desc: output scores for choice (for valid expressions) predicted_scores = td.Record([('input_0', recursion_cases), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32')) ]) predicted_scores = predicted_scores >> td.ScopedLayer( modules.ValDescribeModule, name_or_scope='ValDescribeModule') # For invalid expressions, define a dummy answer # so that all answers have the same form INVALID = assembler.INVALID_EXPR dummy_scores = td.Void() >> td.FromTensor( np.zeros(num_choices, np.float32)) output_scores = td.OneOf(td.GetItem('module'), { '_val_desc': predicted_scores, INVALID: dummy_scores }) # compile and get the output scores self.compiler = td.Compiler.create(output_scores) self.scores = self.compiler.output_tensors[0] # Regularization: Entropy + L2 self.entropy_reg = tf.reduce_mean(neg_entropy) module_weights = [ v for v in tf.trainable_variables() if (scope in v.op.name and v.op.name.endswith('weights')) ] self.l2_reg = tf.add_n([tf.nn.l2_loss(v) for v in module_weights])
def __init__(self, image_batch, text_seq_batch, seq_length_batch, T_decoder, num_vocab_txt, embed_dim_txt, num_vocab_nmn, embed_dim_nmn, lstm_dim, num_layers, EOS_idx, encoder_dropout, decoder_dropout, decoder_sampling, num_choices, use_gt_layout=None, gt_layout_batch=None, scope='neural_module_network', reuse=None): with tf.variable_scope(scope, reuse=reuse): # Part 0: Visual feature from CNN with tf.variable_scope('image_feature_cnn'): image_feat_grid = shapes_convnet(image_batch) self.image_feat_grid = image_feat_grid # Part 1: Seq2seq RNN to generate module layout tokens with tf.variable_scope('layout_generation'): att_seq2seq = nmn3_netgen_att.AttentionSeq2Seq( text_seq_batch, seq_length_batch, T_decoder, num_vocab_txt, embed_dim_txt, num_vocab_nmn, embed_dim_nmn, lstm_dim, num_layers, EOS_idx, encoder_dropout, decoder_dropout, decoder_sampling, use_gt_layout, gt_layout_batch) self.att_seq2seq = att_seq2seq predicted_tokens = att_seq2seq.predicted_tokens token_probs = att_seq2seq.token_probs word_vecs = att_seq2seq.word_vecs neg_entropy = att_seq2seq.neg_entropy self.atts = att_seq2seq.atts self.predicted_tokens = predicted_tokens self.token_probs = token_probs self.word_vecs = word_vecs self.neg_entropy = neg_entropy # log probability of each generated sequence self.log_seq_prob = tf.reduce_sum(tf.log(token_probs), axis=0) # Part 2: Neural Module Network with tf.variable_scope('layout_execution'): modules = Modules(image_feat_grid, word_vecs, num_choices) self.modules = modules # Recursion of modules att_shape = image_feat_grid.get_shape().as_list()[1:-1] + [1] # Forward declaration of module recursion att_expr_decl = td.ForwardDeclaration(td.PyObjectType(), td.TensorType(att_shape)) # _Find case_find = td.Record([('time_idx', td.Scalar(dtype='int32')), ('batch_idx', td.Scalar(dtype='int32')) ]) case_find = case_find >> \ td.ScopedLayer(modules.FindModule, name_or_scope='FindModule') # _Transform case_transform = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_transform = case_transform >> \ td.ScopedLayer(modules.TransformModule, name_or_scope='TransformModule') # _And case_and = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_and = case_and >> \ td.ScopedLayer(modules.AndModule, name_or_scope='AndModule') recursion_cases = td.OneOf( td.GetItem('module'), { '_Find': case_find, '_Transform': case_transform, '_And': case_and }) att_expr_decl.resolve_to(recursion_cases) # _Answer: output scores for choice (for valid expressions) predicted_scores = td.Record([('input_0', recursion_cases), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32')) ]) predicted_scores = predicted_scores >> \ td.ScopedLayer(modules.AnswerModule, name_or_scope='AnswerModule') # For invalid expressions, define a dummy answer # so that all answers have the same form INVALID = nmn3_assembler.INVALID_EXPR dummy_scores = td.Void() >> td.FromTensor( np.zeros(num_choices, np.float32)) output_scores = td.OneOf(td.GetItem('module'), { '_Answer': predicted_scores, INVALID: dummy_scores }) # compile and get the output scores self.compiler = td.Compiler.create(output_scores) self.scores = self.compiler.output_tensors[0] # Regularization: Entropy + L2 self.entropy_reg = tf.reduce_mean(neg_entropy) module_weights = [ v for v in tf.trainable_variables() if (scope in v.op.name and v.op.name.endswith('weights')) ] self.l2_reg = tf.add_n([tf.nn.l2_loss(v) for v in module_weights])
def _compile(self): with self.sess.as_default(): import tensorflow_fold as td output_size = len(self.labels) self.keep_prob = tf.placeholder_with_default(tf.constant(1.0),shape=None) char_emb = td.Embedding(num_buckets=self.char_buckets, num_units_out=self.embedding_size) #initializer=tf.truncated_normal_initializer(stddev=0.15)) char_cell = td.ScopedLayer(tf.contrib.rnn.LSTMCell(num_units=self.rnn_dim), 'char_cell') char_lstm = (td.InputTransform(lambda s: [ord(c) for c in s]) >> td.Map(td.Scalar('int32') >> char_emb) >> td.RNN(char_cell) >> td.GetItem(1) >> td.GetItem(1)) rnn_fwdcell = td.ScopedLayer(tf.contrib.rnn.LSTMCell(num_units=self.rnn_dim), 'lstm_fwd') fwdlayer = td.RNN(rnn_fwdcell) >> td.GetItem(0) rnn_bwdcell = td.ScopedLayer(tf.contrib.rnn.LSTMCell(num_units=self.rnn_dim), 'lstm_bwd') bwdlayer = (td.Slice(step=-1) >> td.RNN(rnn_bwdcell) >> td.GetItem(0) >> td.Slice(step=-1)) pos_emb = td.Embedding(num_buckets=300, num_units_out=32, initializer=tf.truncated_normal_initializer(stddev=0.1)) pos_x = (td.InputTransform(lambda x: x + 150) >> td.Scalar(dtype='int32') >> pos_emb) pos_y = (td.InputTransform(lambda x: x + 150) >> td.Scalar(dtype='int32') >> pos_emb) input_layer = td.Map(td.Record((char_lstm,pos_x,pos_y)) >> td.Concat()) maxlayer = (td.AllOf(fwdlayer, bwdlayer) >> td.ZipWith(td.Concat()) >> td.Max()) output_layer = (input_layer >> maxlayer >> td.FC(output_size, input_keep_prob=self.keep_prob, activation=None)) self.compiler = td.Compiler.create((output_layer, td.Vector(output_size,dtype=tf.int32))) self.y_out, self.y_true = self.compiler.output_tensors self.y_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=self.y_out,labels=self.y_true)) self.y_prob = tf.nn.softmax(self.y_out) self.y_true_idx = tf.argmax(self.y_true,axis=1) self.y_pred_idx = tf.argmax(self.y_prob,axis=1) self.y_pred = tf.one_hot(self.y_pred_idx,depth=output_size,dtype=tf.int32) epoch_step = tf.Variable(0, trainable=False) self.epoch_step_op = tf.assign(epoch_step, epoch_step+1) lrate_decay = tf.train.exponential_decay(self.lrate, epoch_step, 1, self.decay) if self.optimizer == 'adam': self.opt = tf.train.AdamOptimizer(learning_rate=lrate_decay) elif self.optimizer == 'adagrad': self.opt = tf.train.AdagradOptimizer(learning_rate=lrate_decay, initial_accumulator_value=1e-08) elif self.optimizer == 'rmsprop' or self.optimizer == 'default': self.opt = tf.train.RMSPropOptimizer(learning_rate=lrate_decay, epsilon=1e-08) else: raise Exception(('The optimizer {} is not in list of available ' + 'optimizers: default, adam, adagrad, rmsprop.') .format(self.optimizer)) # apply learning multiplier on on embedding learning rate embeds = [pos_emb.weights, char_emb.weights] grads_and_vars = self.opt.compute_gradients(self.y_loss) found = 0 for i, (grad, var) in enumerate(grads_and_vars): if var in embeds: found += 1 grad = tf.scalar_mul(self.embedding_factor, grad) grads_and_vars[i] = (grad, var) assert found == len(embeds) # internal consistency check self.train_step = self.opt.apply_gradients(grads_and_vars) self.sess.run(tf.global_variables_initializer()) self.saver = tf.train.Saver(max_to_keep=100)
def __init__(self, image_feat_grid, text_seq_batch, seq_length_batch, T_decoder, num_vocab_txt, embed_dim_txt, num_vocab_nmn, embed_dim_nmn, lstm_dim, num_layers, assembler, encoder_dropout, decoder_dropout, decoder_sampling, num_choices, use_qpn, qpn_dropout, reduce_visfeat_dim=False, new_visfeat_dim=256, use_gt_layout=None, gt_layout_batch=None, scope='neural_module_network', reuse=None): with tf.variable_scope(scope, reuse=reuse): # Part 0: Visual feature from CNN self.reduce_visfeat_dim = reduce_visfeat_dim if reduce_visfeat_dim: # use an extrac linear 1x1 conv layer (without ReLU) # to reduce the feature dimension with tf.variable_scope('reduce_visfeat_dim'): image_feat_grid = conv('conv_reduce_visfeat_dim', image_feat_grid, kernel_size=1, stride=1, output_dim=new_visfeat_dim) print('visual feature dimension reduced to %d' % new_visfeat_dim) self.image_feat_grid = image_feat_grid # Part 1: Seq2seq RNN to generate module layout tokensa with tf.variable_scope('layout_generation'): att_seq2seq = AttentionSeq2Seq(text_seq_batch, seq_length_batch, T_decoder, num_vocab_txt, embed_dim_txt, num_vocab_nmn, embed_dim_nmn, lstm_dim, num_layers, assembler, encoder_dropout, decoder_dropout, decoder_sampling, use_gt_layout, gt_layout_batch) self.att_seq2seq = att_seq2seq predicted_tokens = att_seq2seq.predicted_tokens token_probs = att_seq2seq.token_probs word_vecs = att_seq2seq.word_vecs neg_entropy = att_seq2seq.neg_entropy self.atts = att_seq2seq.atts self.predicted_tokens = predicted_tokens self.token_probs = token_probs self.word_vecs = word_vecs self.neg_entropy = neg_entropy # log probability of each generated sequence self.log_seq_prob = tf.reduce_sum(tf.log(token_probs), axis=0) # Part 2: Neural Module Network with tf.variable_scope('layout_execution'): modules = Modules(image_feat_grid, word_vecs, None, num_choices) self.modules = modules # Recursion of modules att_shape = image_feat_grid.get_shape().as_list()[1:-1] + [1] # Forward declaration of module recursion att_expr_decl = td.ForwardDeclaration(td.PyObjectType(), td.TensorType(att_shape)) # _Scene case_scene = td.Record([('time_idx', td.Scalar(dtype='int32')), ('batch_idx', td.Scalar(dtype='int32'))]) case_scene = case_scene >> td.Function(modules.SceneModule) # _Find case_find = td.Record([('time_idx', td.Scalar(dtype='int32')), ('batch_idx', td.Scalar(dtype='int32'))]) case_find = case_find >> td.Function(modules.FindModule) # _Filter case_filter = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar(dtype='int32')), ('batch_idx', td.Scalar(dtype='int32'))]) case_filter = case_filter >> td.Function(modules.FilterModule) # _FindSameProperty case_find_same_property = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar(dtype='int32')), ('batch_idx', td.Scalar(dtype='int32'))]) case_find_same_property = case_find_same_property >> \ td.Function(modules.FindSamePropertyModule) # _Transform case_transform = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_transform = case_transform >> td.Function(modules.TransformModule) # _And case_and = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_and = case_and >> td.Function(modules.AndModule) # _Or case_or = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_or = case_or >> td.Function(modules.OrModule) # _Exist case_exist = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_exist = case_exist >> td.Function(modules.ExistModule) # _Count case_count = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_count = case_count >> td.Function(modules.CountModule) # _EqualNum case_equal_num = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_equal_num = case_equal_num >> td.Function(modules.EqualNumModule) # _MoreNum case_more_num = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_more_num = case_more_num >> td.Function(modules.MoreNumModule) # _LessNum case_less_num = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_less_num = case_less_num >> td.Function(modules.LessNumModule) # _SameProperty case_same_property = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_same_property = case_same_property >> \ td.Function(modules.SamePropertyModule) # _Describe case_describe = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_describe = case_describe >> \ td.Function(modules.DescribeModule) recursion_cases = td.OneOf(td.GetItem('module'), { '_Scene': case_scene, '_Find': case_find, '_Filter': case_filter, '_FindSameProperty': case_find_same_property, '_Transform': case_transform, '_And': case_and, '_Or': case_or}) att_expr_decl.resolve_to(recursion_cases) # For invalid expressions, define a dummy answer # so that all answers have the same form dummy_scores = td.Void() >> td.FromTensor(np.zeros(num_choices, np.float32)) output_scores = td.OneOf(td.GetItem('module'), { '_Exist': case_exist, '_Count': case_count, '_EqualNum': case_equal_num, '_MoreNum': case_more_num, '_LessNum': case_less_num, '_SameProperty': case_same_property, '_Describe': case_describe, INVALID_EXPR: dummy_scores}) # compile and get the output scores self.compiler = td.Compiler.create(output_scores) self.scores_nmn = self.compiler.output_tensors[0] # Add a question prior network if specified self.use_qpn = use_qpn self.qpn_dropout = qpn_dropout if use_qpn: self.scores_qpn = question_prior_net(att_seq2seq.encoder_states, num_choices, qpn_dropout) self.scores = self.scores_nmn + self.scores_qpn else: self.scores = self.scores_nmn # Regularization: Entropy + L2 self.entropy_reg = tf.reduce_mean(neg_entropy) module_weights = [v for v in tf.trainable_variables() if (scope in v.op.name and v.op.name.endswith('weights'))] self.l2_reg = tf.add_n([tf.nn.l2_loss(v) for v in module_weights])
def __init__(self, image_data_batch, image_mean, text_seq_batch, seq_length_batch, T_decoder, num_vocab_txt, embed_dim_txt, num_vocab_nmn, embed_dim_nmn, lstm_dim, num_layers, assembler, encoder_dropout, decoder_dropout, decoder_sampling, num_choices, use_qpn, qpn_dropout, reduce_visfeat_dim=False, new_visfeat_dim=128, use_gt_layout=None, gt_layout_batch=None, map_dim=1024, scope='neural_module_network', reuse=None): with tf.variable_scope(scope, reuse=reuse): # Part 0: Visual feature from CNN with tf.variable_scope('image_feature_cnn'): image_data_batch = image_data_batch / 255.0 - image_mean image_feat_grid = nlvr_convnet(image_data_batch) self.image_feat_grid = image_feat_grid # Part 1: Seq2seq RNN to generate module layout tokensa with tf.variable_scope('layout_generation'): att_seq2seq = AttentionSeq2Seq( text_seq_batch, seq_length_batch, T_decoder, num_vocab_txt, embed_dim_txt, num_vocab_nmn, embed_dim_nmn, lstm_dim, num_layers, assembler, encoder_dropout, decoder_dropout, decoder_sampling, use_gt_layout, gt_layout_batch) self.att_seq2seq = att_seq2seq predicted_tokens = att_seq2seq.predicted_tokens token_probs = att_seq2seq.token_probs word_vecs = att_seq2seq.word_vecs neg_entropy = att_seq2seq.neg_entropy self.atts = att_seq2seq.atts self.predicted_tokens = predicted_tokens self.token_probs = token_probs self.word_vecs = word_vecs self.neg_entropy = neg_entropy # log probability of each generated sequence self.log_seq_prob = tf.reduce_sum(tf.log(token_probs), axis=0) # Part 2: Neural Module Network with tf.variable_scope('layout_execution'): modules = Modules(image_feat_grid, word_vecs, None, num_choices, map_dim) self.modules = modules # Recursion of modules att_shape = image_feat_grid.get_shape().as_list()[1:-1] + [1] # Forward declaration of module recursion att_expr_decl = td.ForwardDeclaration(td.PyObjectType(), td.TensorType(att_shape)) # _Find case_find = td.Record([('time_idx', td.Scalar(dtype='int32')), ('batch_idx', td.Scalar(dtype='int32')) ]) case_find = case_find >> td.Function(modules.FindModule) # _Transform case_transform = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_transform = case_transform >> td.Function( modules.TransformModule) # _And case_and = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_and = case_and >> td.Function(modules.AndModule) # _Describe case_describe = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_describe = case_describe >> \ td.Function(modules.DescribeModule) recursion_cases = td.OneOf( td.GetItem('module'), { '_Find': case_find, '_Transform': case_transform, '_And': case_and }) att_expr_decl.resolve_to(recursion_cases) # For invalid expressions, define a dummy answer # so that all answers have the same form dummy_scores = td.Void() >> td.FromTensor( np.zeros(num_choices, np.float32)) output_scores = td.OneOf(td.GetItem('module'), { '_Describe': case_describe, INVALID_EXPR: dummy_scores }) # compile and get the output scores self.compiler = td.Compiler.create(output_scores) self.scores_nmn = self.compiler.output_tensors[0] # Add a question prior network if specified self.use_qpn = use_qpn self.qpn_dropout = qpn_dropout if use_qpn: self.scores_qpn = question_prior_net( att_seq2seq.encoder_states, num_choices, qpn_dropout) self.scores = self.scores_nmn + self.scores_qpn #self.scores = self.scores_nmn else: self.scores = self.scores_nmn # Regularization: Entropy + L2 self.entropy_reg = tf.reduce_mean(neg_entropy) #tf.check_numerics(self.entropy_reg, 'entropy NaN/Inf ') #print(self.entropy_reg.eval()) module_weights = [ v for v in tf.trainable_variables() if (scope in v.op.name and v.op.name.endswith('weights')) ] self.l2_reg = tf.add_n([tf.nn.l2_loss(v) for v in module_weights])