def lstm_block(x, v, lstm_size=512, vocab_size=52, num_words=30, feed_previous=False, scope='lstm_block', reuse=False, batch_size=4): with tf.variable_scope(scope, reuse=reuse): with tf.variable_scope('lstm_1', reuse=reuse): lstm_first = tf.contrib.rnn.BasicLSTMCell(lstm_size, reuse=reuse) state_first = lstm_first.zero_state(batch_size, tf.float32) o_1, state_first = lstm_first(x[:, 0, :], state_first) r = tf.concat([o_1, v], axis=1) with tf.variable_scope('lstm_2', reuse=reuse): lstm_second = tf.contrib.rnn.BasicLSTMCell(lstm_size, reuse=reuse) state_second = lstm_second.zero_state(batch_size, tf.float32) o_2, state_second = lstm_second(r, state_second) o = fullyConnected(o_2, output_units=vocab_size, std='xavier', activation=tf.identity, reuse=False, scope='lstm_fc') with tf.variable_scope(scope, reuse=True): #Teacher training, we feed in a list of words so dont need to feed back in #the output of the lstm outputs = [] outputs.append(o) for i in range(num_words - 1): if not feed_previous: word = x[:, i + 1, :] else: word = tf.softmax(o) with tf.variable_scope('lstm_1', reuse=True): o, state_first = lstm_first(word, state_first) o = tf.concat([o, v], axis=1) with tf.variable_scope('lstm_2', reuse=True): o, state_second = lstm_second(o, state_second) o = fullyConnected(o, output_units=vocab_size, std='xavier', activation=tf.identity, reuse=True, scope='lstm_fc') outputs.append(o) return outputs
def model_architecture(para): # Description: build model architecture (build data flow graphs) # Input: global parameter instance # Return: Placeholder Dictionary inputPC = tf.placeholder(tf.float32, [None, para.pointNumber, 3]) inputGraph = tf.placeholder(tf.float32, [None, para.pointNumber * para.pointNumber]) l2Graph = tf.placeholder( tf.float32, [None, para.clusterNumberL1 * para.clusterNumberL1]) outputLabel = tf.placeholder(tf.float32, [None, para.outputClassN]) batch_size = tf.placeholder(tf.int32) batch_index_l1 = tf.placeholder( tf.int32, [None, para.clusterNumberL1 * para.nearestNeighborL1]) # batch_index_l2 = tf.placeholder(tf.int32, [None, para.clusterNumberL2 * para.nearestNeighborL2]) scaledLaplacian = tf.reshape(inputGraph, [-1, para.pointNumber, para.pointNumber]) l2_scaledLaplacian = tf.reshape( l2Graph, [-1, para.clusterNumberL1, para.clusterNumberL1]) weights = tf.placeholder(tf.float32, [None]) lr = tf.placeholder(tf.float32) keep_prob_1 = tf.placeholder(tf.float32) keep_prob_2 = tf.placeholder(tf.float32) # gcn layer 1 gcn_1 = gcnLayer(inputPC, scaledLaplacian, pointNumber=para.pointNumber, inputFeatureN=3, outputFeatureN=para.gcn_1_filter_n, chebyshev_order=para.chebyshev_1_Order) gcn_1_output = tf.nn.dropout(gcn_1, keep_prob=keep_prob_1) gcn_1_pooling = graph_cluster_maxpooling(batch_index_l1, gcn_1_output, batch_size=batch_size, M=para.clusterNumberL1, k=para.nearestNeighborL1, n=para.gcn_1_filter_n) globalFeatures_1 = tf.reduce_max(gcn_1_pooling, axis=1) print gcn_1_pooling gcn_2 = gcnLayer(gcn_1_pooling, l2_scaledLaplacian, pointNumber=para.clusterNumberL1, inputFeatureN=para.gcn_1_filter_n, outputFeatureN=para.gcn_2_filter_n, chebyshev_order=para.chebyshev_1_Order) gcn_2_output = tf.nn.dropout(gcn_2, keep_prob=keep_prob_1) # gcn_2_pooling = graph_cluster_maxpooling(batch_index_l2, gcn_2_output, batch_size=batch_size, # M=para.clusterNumberL2, k=para.nearestNeighborL2, n=para.gcn_2_filter_n) gcn_2_pooling = gcn_2_output print gcn_2_pooling globalFeatures = tf.reduce_max(gcn_2_pooling, axis=1) print globalFeatures globalFeatures = tf.nn.dropout(globalFeatures, keep_prob=keep_prob_2) print("The global feature is {}".format(globalFeatures)) #final_concat_features = tf.concat([globalFeatures_1, globalFeatures], axis=1) #final_concat_features = globalFeatures globalFeatureN = para.gcn_2_filter_n * 1 # fully connected layer 1 fc_layer_1 = fullyConnected(globalFeatures, inputFeatureN=globalFeatureN, outputFeatureN=para.fc_1_n) fc_layer_1 = tf.nn.relu(fc_layer_1) fc_layer_1 = tf.nn.dropout(fc_layer_1, keep_prob=keep_prob_2) print("The output of the first fc layer is {}".format(fc_layer_1)) # fully connected layer 2 fc_layer_2 = fullyConnected(fc_layer_1, inputFeatureN=para.fc_1_n, outputFeatureN=para.outputClassN) print("The output of the second fc layer is {}".format(fc_layer_2)) # =================================Define loss=========================== predictSoftMax = tf.nn.softmax(fc_layer_2) predictLabels = tf.argmax(predictSoftMax, axis=1) loss = tf.nn.softmax_cross_entropy_with_logits(logits=fc_layer_2, labels=outputLabel) loss = tf.multiply(loss, weights) loss = tf.reduce_mean(loss) vars = tf.trainable_variables() loss_reg = tf.add_n( [tf.nn.l2_loss(v) for v in vars if 'bias' not in v.name]) * 8e-6 # best: 8 #last: 10 loss_total = loss + loss_reg correct_prediction = tf.equal(predictLabels, tf.argmax(outputLabel, axis=1)) acc = tf.cast(correct_prediction, tf.float32) acc = tf.reduce_mean(acc) train = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss_total) total_parameters = 0 for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() variable_parametes = 1 for dim in shape: variable_parametes *= dim.value total_parameters += variable_parametes print('Total parameters number is {}'.format(total_parameters)) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) trainOperaion = { 'train': train, 'loss': loss, 'acc': acc, 'loss_total': loss_total, 'loss_reg': loss_reg, 'inputPC': inputPC, 'inputGraph': inputGraph, 'l2Graph': l2Graph, 'outputLabel': outputLabel, 'weights': weights, 'predictLabels': predictLabels, 'batch_index_l1': batch_index_l1, 'keep_prob_1': keep_prob_1, 'keep_prob_2': keep_prob_2, 'lr': lr, 'batch_size': batch_size } return trainOperaion, sess
def model_architecture(para): # Description: build model architecture (build data flow graphs) # Input: global parameter instance # Return: Placeholder Dictionary inputPC = tf.placeholder(tf.float32, [None, para.pointNumber, 3]) inputGraph = tf.placeholder(tf.float32, [None, para.pointNumber * para.pointNumber]) l2Graph = tf.placeholder(tf.float32, [None, para.clusterNumberL1 * para.clusterNumberL1]) outputLabel = tf.placeholder(tf.float32, [None, para.outputClassN]) batch_size = tf.placeholder(tf.int32) batch_index_l1 = tf.placeholder(tf.int32, [None, para.clusterNumberL1 * para.nearestNeighborL1]) # batch_index_l2 = tf.placeholder(tf.int32, [None, para.clusterNumberL2 * para.nearestNeighborL2]) scaledLaplacian = tf.reshape(inputGraph, [-1, para.pointNumber, para.pointNumber]) l2_scaledLaplacian = tf.reshape(l2Graph, [-1, para.clusterNumberL1, para.clusterNumberL1]) weights = tf.placeholder(tf.float32, [None]) lr = tf.placeholder(tf.float32) keep_prob_1 = tf.placeholder(tf.float32) keep_prob_2 = tf.placeholder(tf.float32) # gcn layer 1 gcn_1 = gcnLayer(inputPC, scaledLaplacian, pointNumber=para.pointNumber, inputFeatureN=3, outputFeatureN=para.gcn_1_filter_n, chebyshev_order=para.chebyshev_1_Order) gcn_1_output = tf.nn.dropout(gcn_1, keep_prob=keep_prob_1) gcn_1_pooling = graph_cluster_maxpooling(batch_index_l1, gcn_1_output, batch_size=batch_size, M=para.clusterNumberL1, k=para.nearestNeighborL1, n=para.gcn_1_filter_n) globalFeatures_1 = tf.reduce_max(gcn_1_pooling, axis=1) print gcn_1_pooling gcn_2 = gcnLayer(gcn_1_pooling, l2_scaledLaplacian, pointNumber=para.clusterNumberL1, inputFeatureN=para.gcn_1_filter_n, outputFeatureN=para.gcn_2_filter_n, chebyshev_order=para.chebyshev_1_Order) gcn_2_output = tf.nn.dropout(gcn_2, keep_prob=keep_prob_1) # gcn_2_pooling = graph_cluster_maxpooling(batch_index_l2, gcn_2_output, batch_size=batch_size, # M=para.clusterNumberL2, k=para.nearestNeighborL2, n=para.gcn_2_filter_n) gcn_2_pooling = gcn_2_output print gcn_2_pooling globalFeatures = tf.reduce_max(gcn_2_pooling, axis=1) print globalFeatures globalFeatures = tf.nn.dropout(globalFeatures, keep_prob=keep_prob_2) print("The global feature is {}".format(globalFeatures)) #final_concat_features = tf.concat([globalFeatures_1, globalFeatures], axis=1) #final_concat_features = globalFeatures globalFeatureN = para.gcn_2_filter_n*1 # fully connected layer 1 fc_layer_1 = fullyConnected(globalFeatures, inputFeatureN=globalFeatureN, outputFeatureN=para.fc_1_n) fc_layer_1 = tf.nn.relu(fc_layer_1) fc_layer_1 = tf.nn.dropout(fc_layer_1, keep_prob = keep_prob_2) print("The output of the first fc layer is {}".format(fc_layer_1)) # fully connected layer 2 fc_layer_2 = fullyConnected(fc_layer_1, inputFeatureN=para.fc_1_n, outputFeatureN=para.outputClassN) print("The output of the second fc layer is {}".format(fc_layer_2)) # =================================Define loss=========================== predictSoftMax = tf.nn.softmax(fc_layer_2) predictLabels = tf.argmax(predictSoftMax, axis=1) loss = tf.nn.softmax_cross_entropy_with_logits(logits=fc_layer_2, labels=outputLabel) loss = tf.multiply(loss, weights) loss = tf.reduce_mean(loss) vars = tf.trainable_variables() loss_reg = tf.add_n([tf.nn.l2_loss(v) for v in vars if 'bias' not in v.name]) * 8e-6 # best: 8 #last: 10 loss_total = loss + loss_reg correct_prediction = tf.equal(predictLabels, tf.argmax(outputLabel, axis=1)) acc = tf.cast(correct_prediction, tf.float32) acc = tf.reduce_mean(acc) train = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss_total) total_parameters = 0 for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() variable_parametes = 1 for dim in shape: variable_parametes *= dim.value total_parameters += variable_parametes print('Total parameters number is {}'.format(total_parameters)) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) trainOperaion = {'train': train, 'loss': loss, 'acc': acc, 'loss_total': loss_total ,'loss_reg': loss_reg, 'inputPC': inputPC, 'inputGraph': inputGraph, 'l2Graph': l2Graph, 'outputLabel': outputLabel, 'weights': weights, 'predictLabels': predictLabels, 'batch_index_l1': batch_index_l1, 'keep_prob_1': keep_prob_1, 'keep_prob_2': keep_prob_2, 'lr': lr, 'batch_size': batch_size} return trainOperaion, sess
def model_architecture(para): inputPC = tf.placeholder(tf.float32, [None, para.pointNumber, 3]) inputGraph = tf.placeholder(tf.float32, [None, para.pointNumber * para.pointNumber]) outputLabel = tf.placeholder(tf.float32, [None, para.outputClassN]) scaledLaplacian = tf.reshape(inputGraph, [-1, para.pointNumber, para.pointNumber]) weights = tf.placeholder(tf.float32, [None]) lr = tf.placeholder(tf.float32) keep_prob_1 = tf.placeholder(tf.float32) keep_prob_2 = tf.placeholder(tf.float32) # gcn layer 1 gcn_1 = gcnLayer(inputPC, scaledLaplacian, pointNumber=para.pointNumber, inputFeatureN=3, outputFeatureN=para.gcn_1_filter_n, chebyshev_order=para.chebyshev_1_Order) gcn_1_output = tf.nn.dropout(gcn_1, keep_prob=keep_prob_1) gcn_1_pooling = globalPooling(gcn_1_output, featureNumber=para.gcn_1_filter_n) print("The output of the first gcn layer is {}".format(gcn_1_pooling)) print gcn_1_pooling # gcn_layer_2 gcn_2 = gcnLayer(gcn_1_output, scaledLaplacian, pointNumber=para.pointNumber, inputFeatureN=para.gcn_1_filter_n, outputFeatureN=para.gcn_2_filter_n, chebyshev_order=para.chebyshev_2_Order) gcn_2_output = tf.nn.dropout(gcn_2, keep_prob=keep_prob_1) gcn_2_pooling = globalPooling(gcn_2_output, featureNumber=para.gcn_2_filter_n) print("The output of the second gcn layer is {}".format(gcn_2_pooling)) #gcn_layer_3 ''' gcn_3 = gcnLayer(gcn_2_output, scaledLaplacian, pointNumber=para.pointNumber, inputFeatureN=para.gcn_2_filter_n, outputFeatureN=para.gcn_3_filter_n, chebyshev_order=para.chebyshev_2_Order) gcn_3_output = tf.nn.dropout(gcn_3, keep_prob=keep_prob_1) gcn_3_pooling = globalPooling(gcn_3_output, featureNumber=para.gcn_3_filter_n) print("The output of the second gcn layer is {}".format(gcn_2_pooling)) ''' # concatenate global features #globalFeatures = gcn_3_pooling globalFeatures = tf.concat([gcn_1_pooling, gcn_2_pooling], axis=1) globalFeatures = tf.nn.dropout(globalFeatures, keep_prob=keep_prob_2) print("The global feature is {}".format(globalFeatures)) #globalFeatureN = para.gcn_2_filter_n*2 globalFeatureN = (para.gcn_1_filter_n + para.gcn_2_filter_n)*2 # fully connected layer 1 fc_layer_1 = fullyConnected(globalFeatures, inputFeatureN=globalFeatureN, outputFeatureN=para.fc_1_n) fc_layer_1 = tf.nn.relu(fc_layer_1) fc_layer_1 = tf.nn.dropout(fc_layer_1, keep_prob=keep_prob_2) print("The output of the first fc layer is {}".format(fc_layer_1)) # fully connected layer 2 fc_layer_2 = fullyConnected(fc_layer_1, inputFeatureN=para.fc_1_n, outputFeatureN=para.outputClassN) print("The output of the second fc layer is {}".format(fc_layer_2)) # =================================Define loss=========================== predictSoftMax = tf.nn.softmax(fc_layer_2) predictLabels = tf.argmax(predictSoftMax, axis=1) loss = tf.nn.softmax_cross_entropy_with_logits(logits=fc_layer_2, labels=outputLabel) loss = tf.multiply(loss, weights) loss = tf.reduce_mean(loss) vars = tf.trainable_variables() loss_reg = tf.add_n([tf.nn.l2_loss(v) for v in vars if 'bias' not in v.name]) * 8e-6 # best: 8 #last: 10 loss_total = loss + loss_reg correct_prediction = tf.equal(predictLabels, tf.argmax(outputLabel, axis=1)) acc = tf.cast(correct_prediction, tf.float32) acc = tf.reduce_mean(acc) train = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss_total) total_parameters = 0 for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() variable_parametes = 1 for dim in shape: variable_parametes *= dim.value total_parameters += variable_parametes print('Total parameters number is {}'.format(total_parameters)) trainOperaion = {'train': train, 'loss_total':loss_total,'loss': loss, 'acc': acc, 'loss_reg': loss_reg, 'inputPC': inputPC, 'inputGraph': inputGraph, 'outputLabel': outputLabel, 'weights': weights, 'predictLabels': predictLabels, 'keep_prob_1': keep_prob_1, 'keep_prob_2': keep_prob_2, 'lr': lr} return trainOperaion
y = tf.placeholder(shape=[None, crop_dims, crop_dims, C], dtype=tf.float32) ############### # I2I ############### yclass, yhat, o3, o4 = tf_util.I2INet(x, nfilters=Nfilters, activation=leaky_relu, init=init) y_vec = tf.reshape(yhat, (Nbatch, crop_dims**2)) sp = tf_util.fullyConnected(y_vec, crop_dims, leaky_relu, std='xavier', scope='sp1') sp = tf_util.fullyConnected(y_vec, crop_dims**2, leaky_relu, std='xavier', scope='sp2') sp = tf.reshape(sp, (Nbatch, crop_dims, crop_dims, 1)) y_sp = tf_util.conv2D(sp, nfilters=Nfilters, activation=leaky_relu, init=init, scope='sp3') y_sp_1 = tf_util.conv2D(y_sp,
def conv_block(x, num_filters=32, filter_dims=[5, 5], fc_size=1024, scope='conv_block', batch_size=4): s = x.get_shape().as_list() with tf.variable_scope(scope): #downsample image with stride [3,3] a = conv2D(x, dims=[7, 7], filters=num_filters, strides=[3, 3], std='xavier', padding='VALID', activation=tf.nn.relu, scope='conv1') #no downsampling with stride [1,1] a = conv2D(a, filter_dims, filters=num_filters, strides=[1, 1], std='xavier', padding='SAME', activation=tf.nn.relu, scope='conv2') num_filters = 2 * num_filters #downsample image with stride [2,2] a = conv2D(a, filter_dims, filters=num_filters, strides=[2, 2], std='xavier', padding='VALID', activation=tf.nn.relu, scope='conv3') #no downsampling with stride [1,1] a = conv2D(a, filter_dims, filters=num_filters, strides=[1, 1], std='xavier', padding='SAME', activation=tf.nn.relu, scope='conv4') num_filters = 2 * num_filters #downsample image with stride [2,2] a = conv2D(a, filter_dims, filters=num_filters, strides=[2, 2], std='xavier', padding='VALID', activation=tf.nn.relu, scope='conv5') #no downsampling with stride [1,1] a = conv2D(a, filter_dims, filters=num_filters, strides=[1, 1], std='xavier', padding='SAME', activation=tf.nn.relu, scope='conv6') #downsample image with stride [2,2] num_filters = 32 a = conv2D(a, filter_dims, filters=num_filters, strides=[2, 2], std='xavier', padding='VALID', activation=tf.nn.relu, scope='conv5') #Convert to vector with fullyconnected layer a = tf.reshape(a, shape=[batch_size, -1]) a = fullyConnected(a, output_units=fc_size, activation=tf.nn.relu, std='xavier', scope='fc') print "output vector of conv_block is: {}".format(a) return a
def create_models(self): cfg = self.cfg if self.mode == 'test': print('Creating test models....') else: print('Creating train models....') ######################## ###### Parameters ###### ######################## nb_anchors = cfg.nb_anchors pool_size = cfg.pool_size nb_object_classes = cfg.nb_object_classes nb_hoi_classes = cfg.nb_hoi_classes print(' Obj. classes:', nb_object_classes) print(' HOI classes:', nb_hoi_classes) ######################## ######## Inputs ######## ######################## # RPN # img_input = keras.layers.Input(shape=(None, None, 3), name='input_image') # DET # nb_detection_rois = cfg.nb_detection_rois if self.mode == 'train' else None img_det_input = keras.layers.Input(shape=(None, None, 3), name='input_image') roi_input = keras.layers.Input(shape=(nb_detection_rois, 5), name='input_roi') # HOI # nb_hoi_rois = cfg.nb_hoi_rois if self.mode == 'train' else None img_hoi_input = keras.layers.Input(shape=(None, None, 3), name='input_image') human_fast_input = keras.layers.Input(shape=(nb_hoi_rois, 5), name="input_human") object_fast_input = keras.layers.Input(shape=(nb_hoi_rois, 5), name="input_object") interaction_fast_input = keras.layers.Input(shape=(nb_hoi_rois, cfg.winShape[0], cfg.winShape[1], 2), name="input_interaction") human_img_input = keras.layers.Input(shape=(227, 227, 3), name="input_human_img") object_img_input = keras.layers.Input(shape=(227, 227, 3), name="input_object_img") interaction_slow_input = keras.layers.Input(shape=(cfg.winShape[0], cfg.winShape[1], 2), name="input_interaction") human_slow_input = keras.layers.Input(shape=(5, ), name="input_human") object_slow_input = keras.layers.Input(shape=(5, ), name="input_object") # SHARED # features_input = keras.layers.Input(shape=(None, None, 512), name="input_features") ######################## ######### RPN ########## ######################## if self.do_rpn: print(' Creating RPN model...') output_features = models.VGG16_buildin(cfg)(img_input) self.nb_models += 1 rpn_inputs = [img_input] rpn_features = layers.rpn(cfg)([output_features]) x_class = keras.layers.Conv2D( filters=nb_anchors, kernel_size=(1, 1), activation='sigmoid', kernel_initializer=keras.initializers.RandomNormal( stddev=0.01), kernel_regularizer=keras.regularizers.l2(cfg.weight_decay), bias_regularizer=keras.regularizers.l2(cfg.weight_decay), name='rpn_out_class')(rpn_features) x_deltas = keras.layers.Conv2D( filters=nb_anchors * 4, kernel_size=(1, 1), activation='linear', kernel_initializer=keras.initializers.RandomNormal( stddev=0.01), kernel_regularizer=keras.regularizers.l2(cfg.weight_decay), bias_regularizer=keras.regularizers.l2(cfg.weight_decay), name='rpn_out_regress')(rpn_features) if self.mode == 'test' and cfg.use_shared_cnn: rpn_outputs = [x_class, x_deltas, output_features] else: rpn_outputs = [x_class, x_deltas] self.model_rpn = keras.models.Model(inputs=rpn_inputs, outputs=rpn_outputs) self.model_rpn.name = 'rpn' # Only train from conv3_1 print(' Freezing first few layers...') nb_freeze_layers = 17 if cfg.do_finetune else cfg.nb_freeze_layers print(' Freeze up to', nb_freeze_layers) for i, layer in enumerate(self.model_rpn.layers): layer.trainable = False if i == nb_freeze_layers: break ######################## ###### Detection ####### ######################## if self.do_det: print(' Creating DET model...') self.nb_models += 1 if self.mode == 'test' and cfg.use_shared_cnn: print(' -using shared CNN') output_features_det = features_input detection_inputs = [features_input, roi_input] else: output_features_det = models.VGG16_buildin(cfg)(img_det_input) detection_inputs = [img_det_input, roi_input] object_rois = layers.RoiPoolingConv( pool_size=pool_size, batch_size=cfg.nb_detection_rois)( [output_features_det, roi_input]) object_features = layers.fullyConnected( cfg, stream='det', use_dropout=True)([object_rois]) object_scores = keras.layers.TimeDistributed( keras.layers.Dense( units=nb_object_classes, activation='softmax', kernel_initializer=keras.initializers.RandomNormal( stddev=0.01), kernel_regularizer=keras.regularizers.l2(cfg.weight_decay), bias_regularizer=keras.regularizers.l2(cfg.weight_decay)), name="det_out_class" if not cfg.do_finetune else "det_fineout_class")(object_features) object_deltas = keras.layers.TimeDistributed( keras.layers.Dense( units=4 * (nb_object_classes - 1), activation="linear", kernel_initializer=keras.initializers.RandomNormal( stddev=0.001), kernel_regularizer=keras.regularizers.l2(cfg.weight_decay), bias_regularizer=keras.regularizers.l2(cfg.weight_decay)), name="det_out_regress" if not cfg.do_finetune else "det_fineout_regress")(object_features) detection_outputs = [object_scores, object_deltas] self.model_det = keras.models.Model(inputs=detection_inputs, outputs=detection_outputs) self.model_det.name = 'det' # Only train from conv3_1 nb_freeze_layers = 17 if cfg.do_finetune else cfg.nb_freeze_layers for i, layer in enumerate(self.model_det.layers): layer.trainable = False if i == nb_freeze_layers: break ######################## ######### HOI ########## ######################## if self.do_hoi and cfg.do_fast_hoi: print(' Creating fast HOI model...') self.nb_models += 1 if self.mode == 'test' and cfg.use_shared_cnn: print(' -using shared CNN') output_features_hoi = features_input hoi_inputs = [ features_input, human_fast_input, object_fast_input, interaction_fast_input ] else: if cfg.backbone == 'vgg': output_features_hoi = models.VGG16_buildin(cfg)( img_hoi_input) else: output_features_hoi = models.AlexNet_buildin(cfg)( img_hoi_input) hoi_inputs = [ img_hoi_input, human_fast_input, object_fast_input, interaction_fast_input ] ## HUMAN ## hoi_human_rois = layers.RoiPoolingConv( pool_size=pool_size, batch_size=cfg.nb_hoi_rois, mode=self.mode)([output_features_hoi, human_fast_input]) hoi_human_features = layers.fullyConnected( cfg, stream='human')([hoi_human_rois]) hoi_human_scores = keras.layers.TimeDistributed( keras.layers.Dense( units=1 * nb_hoi_classes, activation=None, kernel_initializer=keras.initializers.RandomNormal( stddev=0.01), kernel_regularizer=keras.regularizers.l2(cfg.weight_decay), ), name="scores4human" if not cfg.do_finetune else "scores4human_finetune")(hoi_human_features) ## OBJECT ## hoi_object_rois = layers.RoiPoolingConv( pool_size=pool_size, batch_size=cfg.nb_hoi_rois, mode=self.mode)([output_features_hoi, object_fast_input]) hoi_object_features = layers.fullyConnected( cfg, stream='object')([hoi_object_rois]) hoi_object_scores = keras.layers.TimeDistributed( keras.layers.Dense( units=1 * nb_hoi_classes, activation=None, kernel_initializer=keras.initializers.RandomNormal( stddev=0.01), kernel_regularizer=keras.regularizers.l2(cfg.weight_decay), ), name="scores4object" if not cfg.do_finetune else "scores4object_finetune")(hoi_object_features) ## INTERACTION ## hoi_pattern_features = layers.pairwiseStream(cfg=cfg)( [interaction_fast_input]) hoi_pattern_scores = keras.layers.TimeDistributed( keras.layers.Dense( units=1 * nb_hoi_classes, activation=None, kernel_initializer=keras.initializers.RandomNormal( stddev=0.01), kernel_regularizer=keras.regularizers.l2(cfg.weight_decay), ), name="scores4pattern" if not cfg.do_finetune else "scores4pattern_finetune")(hoi_pattern_features) ## FINAL ## hoi_score = keras.layers.Add()( [hoi_human_scores, hoi_object_scores, hoi_pattern_scores]) hoi_final_score = keras.layers.Activation( "softmax" if cfg.do_categorical_hoi else 'sigmoid', name="hoi_out_class" if not cfg.do_finetune else "hoi_fineout_class")(hoi_score) human_fast_input = layers.identity(cfg)([human_fast_input]) object_fast_input = layers.identity(cfg)([object_fast_input]) if self.mode == 'test': hoi_outputs = [ hoi_final_score, human_fast_input, object_fast_input ] else: hoi_outputs = [hoi_final_score] self.model_hoi = keras.models.Model(inputs=hoi_inputs, outputs=hoi_outputs) self.model_hoi.name = 'hoi' if self.do_hoi and not cfg.do_fast_hoi: print(' Creating slow HOI model...') self.nb_models += 1 if cfg.backbone == 'vgg': hoi_human_features = models.VGG16_buildin(cfg)(human_img_input) hoi_object_features = models.VGG16_buildin(cfg)( object_img_input) else: hoi_human_features = models.AlexNet_buildin(cfg)( human_img_input) hoi_object_features = models.AlexNet_buildin(cfg)( object_img_input) hoi_inputs = [ human_img_input, object_img_input, interaction_slow_input, human_slow_input, object_slow_input ] ## HUMAN ## hoi_human_scores = keras.layers.Dense( units=1 * nb_hoi_classes, activation=None, kernel_initializer=keras.initializers.RandomNormal( stddev=0.01), kernel_regularizer=keras.regularizers.l2(cfg.weight_decay), name="scores4human")(hoi_human_features) ## OBJECT ## hoi_object_scores = keras.layers.Dense( units=1 * nb_hoi_classes, activation=None, kernel_initializer=keras.initializers.RandomNormal( stddev=0.01), kernel_regularizer=keras.regularizers.l2(cfg.weight_decay), name="scores4object")(hoi_object_features) ## INTERACTION ## interaction_input = layers.intct_expansion(cfg)( [interaction_slow_input]) hoi_pattern_features = layers.pairwiseStream(cfg=cfg)( [interaction_input]) hoi_pattern_scores = keras.layers.TimeDistributed( keras.layers.Dense( units=1 * nb_hoi_classes, activation=None, kernel_initializer=keras.initializers.RandomNormal( stddev=0.01), kernel_regularizer=keras.regularizers.l2(cfg.weight_decay), ), name='scores4pattern')(hoi_pattern_features) hoi_pattern_scores = layers.intct_reduction(cfg)( [hoi_pattern_scores]) ## FINAL ## hoi_score = keras.layers.Add()( [hoi_human_scores, hoi_object_scores, hoi_pattern_scores]) hoi_final_score = keras.layers.Activation( "sigmoid", name="hoi_out_class")(hoi_score) human_slow_input = layers.identity(cfg)([human_slow_input]) object_slow_input = layers.identity(cfg)([object_slow_input]) if self.mode == 'test': hoi_outputs = [ hoi_final_score, human_slow_input, object_slow_input ] else: hoi_outputs = [hoi_final_score] self.model_hoi = keras.models.Model(inputs=hoi_inputs, outputs=hoi_outputs) self.model_hoi.name = 'hoi'