Пример #1
0
def lstm_block(x,
               v,
               lstm_size=512,
               vocab_size=52,
               num_words=30,
               feed_previous=False,
               scope='lstm_block',
               reuse=False,
               batch_size=4):

    with tf.variable_scope(scope, reuse=reuse):
        with tf.variable_scope('lstm_1', reuse=reuse):
            lstm_first = tf.contrib.rnn.BasicLSTMCell(lstm_size, reuse=reuse)
            state_first = lstm_first.zero_state(batch_size, tf.float32)

            o_1, state_first = lstm_first(x[:, 0, :], state_first)

            r = tf.concat([o_1, v], axis=1)
        with tf.variable_scope('lstm_2', reuse=reuse):
            lstm_second = tf.contrib.rnn.BasicLSTMCell(lstm_size, reuse=reuse)
            state_second = lstm_second.zero_state(batch_size, tf.float32)

            o_2, state_second = lstm_second(r, state_second)

        o = fullyConnected(o_2,
                           output_units=vocab_size,
                           std='xavier',
                           activation=tf.identity,
                           reuse=False,
                           scope='lstm_fc')

    with tf.variable_scope(scope, reuse=True):
        #Teacher training, we feed in a list of words so dont need to feed back in
        #the output of the lstm
        outputs = []
        outputs.append(o)
        for i in range(num_words - 1):
            if not feed_previous:
                word = x[:, i + 1, :]
            else:
                word = tf.softmax(o)

            with tf.variable_scope('lstm_1', reuse=True):
                o, state_first = lstm_first(word, state_first)

            o = tf.concat([o, v], axis=1)

            with tf.variable_scope('lstm_2', reuse=True):
                o, state_second = lstm_second(o, state_second)

            o = fullyConnected(o,
                               output_units=vocab_size,
                               std='xavier',
                               activation=tf.identity,
                               reuse=True,
                               scope='lstm_fc')

            outputs.append(o)

    return outputs
def model_architecture(para):
    # Description: build model architecture (build data flow graphs)
    # Input: global parameter instance
    # Return: Placeholder Dictionary
    inputPC = tf.placeholder(tf.float32, [None, para.pointNumber, 3])
    inputGraph = tf.placeholder(tf.float32,
                                [None, para.pointNumber * para.pointNumber])
    l2Graph = tf.placeholder(
        tf.float32, [None, para.clusterNumberL1 * para.clusterNumberL1])
    outputLabel = tf.placeholder(tf.float32, [None, para.outputClassN])
    batch_size = tf.placeholder(tf.int32)

    batch_index_l1 = tf.placeholder(
        tf.int32, [None, para.clusterNumberL1 * para.nearestNeighborL1])
    # batch_index_l2 = tf.placeholder(tf.int32, [None, para.clusterNumberL2 * para.nearestNeighborL2])

    scaledLaplacian = tf.reshape(inputGraph,
                                 [-1, para.pointNumber, para.pointNumber])
    l2_scaledLaplacian = tf.reshape(
        l2Graph, [-1, para.clusterNumberL1, para.clusterNumberL1])

    weights = tf.placeholder(tf.float32, [None])
    lr = tf.placeholder(tf.float32)
    keep_prob_1 = tf.placeholder(tf.float32)
    keep_prob_2 = tf.placeholder(tf.float32)

    # gcn layer 1
    gcn_1 = gcnLayer(inputPC,
                     scaledLaplacian,
                     pointNumber=para.pointNumber,
                     inputFeatureN=3,
                     outputFeatureN=para.gcn_1_filter_n,
                     chebyshev_order=para.chebyshev_1_Order)
    gcn_1_output = tf.nn.dropout(gcn_1, keep_prob=keep_prob_1)
    gcn_1_pooling = graph_cluster_maxpooling(batch_index_l1,
                                             gcn_1_output,
                                             batch_size=batch_size,
                                             M=para.clusterNumberL1,
                                             k=para.nearestNeighborL1,
                                             n=para.gcn_1_filter_n)

    globalFeatures_1 = tf.reduce_max(gcn_1_pooling, axis=1)
    print gcn_1_pooling

    gcn_2 = gcnLayer(gcn_1_pooling,
                     l2_scaledLaplacian,
                     pointNumber=para.clusterNumberL1,
                     inputFeatureN=para.gcn_1_filter_n,
                     outputFeatureN=para.gcn_2_filter_n,
                     chebyshev_order=para.chebyshev_1_Order)

    gcn_2_output = tf.nn.dropout(gcn_2, keep_prob=keep_prob_1)
    # gcn_2_pooling = graph_cluster_maxpooling(batch_index_l2, gcn_2_output, batch_size=batch_size,
    # M=para.clusterNumberL2, k=para.nearestNeighborL2, n=para.gcn_2_filter_n)
    gcn_2_pooling = gcn_2_output
    print gcn_2_pooling

    globalFeatures = tf.reduce_max(gcn_2_pooling, axis=1)
    print globalFeatures

    globalFeatures = tf.nn.dropout(globalFeatures, keep_prob=keep_prob_2)
    print("The global feature is {}".format(globalFeatures))

    #final_concat_features = tf.concat([globalFeatures_1, globalFeatures], axis=1)
    #final_concat_features = globalFeatures
    globalFeatureN = para.gcn_2_filter_n * 1

    # fully connected layer 1
    fc_layer_1 = fullyConnected(globalFeatures,
                                inputFeatureN=globalFeatureN,
                                outputFeatureN=para.fc_1_n)
    fc_layer_1 = tf.nn.relu(fc_layer_1)
    fc_layer_1 = tf.nn.dropout(fc_layer_1, keep_prob=keep_prob_2)
    print("The output of the first fc layer is {}".format(fc_layer_1))

    # fully connected layer 2
    fc_layer_2 = fullyConnected(fc_layer_1,
                                inputFeatureN=para.fc_1_n,
                                outputFeatureN=para.outputClassN)
    print("The output of the second fc layer is {}".format(fc_layer_2))

    # =================================Define loss===========================
    predictSoftMax = tf.nn.softmax(fc_layer_2)
    predictLabels = tf.argmax(predictSoftMax, axis=1)
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=fc_layer_2,
                                                   labels=outputLabel)
    loss = tf.multiply(loss, weights)
    loss = tf.reduce_mean(loss)

    vars = tf.trainable_variables()
    loss_reg = tf.add_n(
        [tf.nn.l2_loss(v)
         for v in vars if 'bias' not in v.name]) * 8e-6  # best: 8 #last: 10
    loss_total = loss + loss_reg

    correct_prediction = tf.equal(predictLabels, tf.argmax(outputLabel,
                                                           axis=1))
    acc = tf.cast(correct_prediction, tf.float32)
    acc = tf.reduce_mean(acc)

    train = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss_total)

    total_parameters = 0
    for variable in tf.trainable_variables():
        # shape is an array of tf.Dimension
        shape = variable.get_shape()
        variable_parametes = 1
        for dim in shape:
            variable_parametes *= dim.value
        total_parameters += variable_parametes
    print('Total parameters number is {}'.format(total_parameters))

    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    trainOperaion = {
        'train': train,
        'loss': loss,
        'acc': acc,
        'loss_total': loss_total,
        'loss_reg': loss_reg,
        'inputPC': inputPC,
        'inputGraph': inputGraph,
        'l2Graph': l2Graph,
        'outputLabel': outputLabel,
        'weights': weights,
        'predictLabels': predictLabels,
        'batch_index_l1': batch_index_l1,
        'keep_prob_1': keep_prob_1,
        'keep_prob_2': keep_prob_2,
        'lr': lr,
        'batch_size': batch_size
    }
    return trainOperaion, sess
def model_architecture(para):
    # Description: build model architecture (build data flow graphs)
    # Input: global parameter instance
    # Return: Placeholder Dictionary
    inputPC = tf.placeholder(tf.float32, [None, para.pointNumber, 3])
    inputGraph = tf.placeholder(tf.float32, [None, para.pointNumber * para.pointNumber])
    l2Graph = tf.placeholder(tf.float32, [None, para.clusterNumberL1 * para.clusterNumberL1])
    outputLabel = tf.placeholder(tf.float32, [None, para.outputClassN])
    batch_size = tf.placeholder(tf.int32)

    batch_index_l1 = tf.placeholder(tf.int32, [None, para.clusterNumberL1 * para.nearestNeighborL1])
    # batch_index_l2 = tf.placeholder(tf.int32, [None, para.clusterNumberL2 * para.nearestNeighborL2])

    scaledLaplacian = tf.reshape(inputGraph, [-1, para.pointNumber, para.pointNumber])
    l2_scaledLaplacian = tf.reshape(l2Graph, [-1, para.clusterNumberL1, para.clusterNumberL1])

    weights = tf.placeholder(tf.float32, [None])
    lr = tf.placeholder(tf.float32)
    keep_prob_1 = tf.placeholder(tf.float32)
    keep_prob_2 = tf.placeholder(tf.float32)

    # gcn layer 1
    gcn_1 = gcnLayer(inputPC, scaledLaplacian, pointNumber=para.pointNumber, inputFeatureN=3,
                     outputFeatureN=para.gcn_1_filter_n,
                     chebyshev_order=para.chebyshev_1_Order)
    gcn_1_output = tf.nn.dropout(gcn_1, keep_prob=keep_prob_1)
    gcn_1_pooling = graph_cluster_maxpooling(batch_index_l1, gcn_1_output, batch_size=batch_size,
                                             M=para.clusterNumberL1, k=para.nearestNeighborL1, n=para.gcn_1_filter_n)
    
    globalFeatures_1 = tf.reduce_max(gcn_1_pooling, axis=1)
    print gcn_1_pooling

    gcn_2 = gcnLayer(gcn_1_pooling, l2_scaledLaplacian, pointNumber=para.clusterNumberL1,
                     inputFeatureN=para.gcn_1_filter_n,
                     outputFeatureN=para.gcn_2_filter_n, chebyshev_order=para.chebyshev_1_Order)

    gcn_2_output = tf.nn.dropout(gcn_2, keep_prob=keep_prob_1)
    # gcn_2_pooling = graph_cluster_maxpooling(batch_index_l2, gcn_2_output, batch_size=batch_size,
    # M=para.clusterNumberL2, k=para.nearestNeighborL2, n=para.gcn_2_filter_n)
    gcn_2_pooling = gcn_2_output
    print gcn_2_pooling

    globalFeatures = tf.reduce_max(gcn_2_pooling, axis=1)
    print globalFeatures


    globalFeatures = tf.nn.dropout(globalFeatures, keep_prob=keep_prob_2)
    print("The global feature is {}".format(globalFeatures))

    #final_concat_features = tf.concat([globalFeatures_1, globalFeatures], axis=1)
    #final_concat_features = globalFeatures
    globalFeatureN = para.gcn_2_filter_n*1

    # fully connected layer 1
    fc_layer_1 = fullyConnected(globalFeatures, inputFeatureN=globalFeatureN, outputFeatureN=para.fc_1_n)
    fc_layer_1 = tf.nn.relu(fc_layer_1)
    fc_layer_1 = tf.nn.dropout(fc_layer_1, keep_prob = keep_prob_2)
    print("The output of the first fc layer is {}".format(fc_layer_1))

    # fully connected layer 2
    fc_layer_2 = fullyConnected(fc_layer_1, inputFeatureN=para.fc_1_n, outputFeatureN=para.outputClassN)
    print("The output of the second fc layer is {}".format(fc_layer_2))

    # =================================Define loss===========================
    predictSoftMax = tf.nn.softmax(fc_layer_2)
    predictLabels = tf.argmax(predictSoftMax, axis=1)
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=fc_layer_2, labels=outputLabel)
    loss = tf.multiply(loss, weights)
    loss = tf.reduce_mean(loss)

    vars = tf.trainable_variables()
    loss_reg = tf.add_n([tf.nn.l2_loss(v) for v in vars if 'bias' not in v.name]) * 8e-6  # best: 8 #last: 10
    loss_total = loss + loss_reg

    correct_prediction = tf.equal(predictLabels, tf.argmax(outputLabel, axis=1))
    acc = tf.cast(correct_prediction, tf.float32)
    acc = tf.reduce_mean(acc)

    train = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss_total)

    total_parameters = 0
    for variable in tf.trainable_variables():
        # shape is an array of tf.Dimension
        shape = variable.get_shape()
        variable_parametes = 1
        for dim in shape:
            variable_parametes *= dim.value
        total_parameters += variable_parametes
    print('Total parameters number is {}'.format(total_parameters))

    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    trainOperaion = {'train': train, 'loss': loss, 'acc': acc, 'loss_total': loss_total ,'loss_reg': loss_reg, 'inputPC': inputPC,
                     'inputGraph': inputGraph, 'l2Graph': l2Graph, 'outputLabel': outputLabel, 'weights': weights,
                     'predictLabels': predictLabels, 'batch_index_l1': batch_index_l1,
                     'keep_prob_1': keep_prob_1, 'keep_prob_2': keep_prob_2, 'lr': lr, 'batch_size': batch_size}
    return trainOperaion, sess
def model_architecture(para):
    inputPC = tf.placeholder(tf.float32, [None, para.pointNumber, 3])
    inputGraph = tf.placeholder(tf.float32, [None, para.pointNumber * para.pointNumber])
    outputLabel = tf.placeholder(tf.float32, [None, para.outputClassN])

    scaledLaplacian = tf.reshape(inputGraph, [-1, para.pointNumber, para.pointNumber])

    weights = tf.placeholder(tf.float32, [None])
    lr = tf.placeholder(tf.float32)
    keep_prob_1 = tf.placeholder(tf.float32)
    keep_prob_2 = tf.placeholder(tf.float32)

    # gcn layer 1
    gcn_1 = gcnLayer(inputPC, scaledLaplacian, pointNumber=para.pointNumber, inputFeatureN=3,
                     outputFeatureN=para.gcn_1_filter_n,
                     chebyshev_order=para.chebyshev_1_Order)
    gcn_1_output = tf.nn.dropout(gcn_1, keep_prob=keep_prob_1)
    gcn_1_pooling = globalPooling(gcn_1_output, featureNumber=para.gcn_1_filter_n)
    print("The output of the first gcn layer is {}".format(gcn_1_pooling))
    print gcn_1_pooling

    # gcn_layer_2
    
    gcn_2 = gcnLayer(gcn_1_output, scaledLaplacian, pointNumber=para.pointNumber, inputFeatureN=para.gcn_1_filter_n,
                     outputFeatureN=para.gcn_2_filter_n,
                     chebyshev_order=para.chebyshev_2_Order)
    gcn_2_output = tf.nn.dropout(gcn_2, keep_prob=keep_prob_1)
    gcn_2_pooling = globalPooling(gcn_2_output, featureNumber=para.gcn_2_filter_n)
    print("The output of the second gcn layer is {}".format(gcn_2_pooling))
    
    #gcn_layer_3
    '''
    gcn_3 = gcnLayer(gcn_2_output, scaledLaplacian, pointNumber=para.pointNumber, inputFeatureN=para.gcn_2_filter_n,
                     outputFeatureN=para.gcn_3_filter_n,
                     chebyshev_order=para.chebyshev_2_Order)
    gcn_3_output = tf.nn.dropout(gcn_3, keep_prob=keep_prob_1)
    gcn_3_pooling = globalPooling(gcn_3_output, featureNumber=para.gcn_3_filter_n)
    print("The output of the second gcn layer is {}".format(gcn_2_pooling))
    '''

    # concatenate global features
    #globalFeatures = gcn_3_pooling
    globalFeatures = tf.concat([gcn_1_pooling, gcn_2_pooling], axis=1)
    globalFeatures = tf.nn.dropout(globalFeatures, keep_prob=keep_prob_2)
    print("The global feature is {}".format(globalFeatures))
    #globalFeatureN = para.gcn_2_filter_n*2
    globalFeatureN = (para.gcn_1_filter_n + para.gcn_2_filter_n)*2 

    # fully connected layer 1
    fc_layer_1 = fullyConnected(globalFeatures, inputFeatureN=globalFeatureN, outputFeatureN=para.fc_1_n)
    fc_layer_1 = tf.nn.relu(fc_layer_1)
    fc_layer_1 = tf.nn.dropout(fc_layer_1, keep_prob=keep_prob_2)
    print("The output of the first fc layer is {}".format(fc_layer_1))

    # fully connected layer 2
    fc_layer_2 = fullyConnected(fc_layer_1, inputFeatureN=para.fc_1_n, outputFeatureN=para.outputClassN)
    print("The output of the second fc layer is {}".format(fc_layer_2))

    # =================================Define loss===========================
    predictSoftMax = tf.nn.softmax(fc_layer_2)
    predictLabels = tf.argmax(predictSoftMax, axis=1)
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=fc_layer_2, labels=outputLabel)
    loss = tf.multiply(loss, weights)
    loss = tf.reduce_mean(loss)

    vars = tf.trainable_variables()
    loss_reg = tf.add_n([tf.nn.l2_loss(v) for v in vars if 'bias' not in v.name]) * 8e-6  # best: 8 #last: 10
    loss_total = loss + loss_reg

    correct_prediction = tf.equal(predictLabels, tf.argmax(outputLabel, axis=1))
    acc = tf.cast(correct_prediction, tf.float32)
    acc = tf.reduce_mean(acc)

    train = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss_total)

    total_parameters = 0
    for variable in tf.trainable_variables():
        # shape is an array of tf.Dimension
        shape = variable.get_shape()
        variable_parametes = 1
        for dim in shape:
            variable_parametes *= dim.value
        total_parameters += variable_parametes
    print('Total parameters number is {}'.format(total_parameters))
    
    trainOperaion = {'train': train, 'loss_total':loss_total,'loss': loss, 'acc': acc, 'loss_reg': loss_reg, 'inputPC': inputPC,
                     'inputGraph': inputGraph, 'outputLabel': outputLabel, 'weights': weights,
                     'predictLabels': predictLabels,
                     'keep_prob_1': keep_prob_1, 'keep_prob_2': keep_prob_2, 'lr': lr}

    return trainOperaion
Пример #5
0
    y = tf.placeholder(shape=[None, crop_dims, crop_dims, C], dtype=tf.float32)

    ###############
    # I2I
    ###############

    yclass, yhat, o3, o4 = tf_util.I2INet(x,
                                          nfilters=Nfilters,
                                          activation=leaky_relu,
                                          init=init)

    y_vec = tf.reshape(yhat, (Nbatch, crop_dims**2))

    sp = tf_util.fullyConnected(y_vec,
                                crop_dims,
                                leaky_relu,
                                std='xavier',
                                scope='sp1')
    sp = tf_util.fullyConnected(y_vec,
                                crop_dims**2,
                                leaky_relu,
                                std='xavier',
                                scope='sp2')
    sp = tf.reshape(sp, (Nbatch, crop_dims, crop_dims, 1))

    y_sp = tf_util.conv2D(sp,
                          nfilters=Nfilters,
                          activation=leaky_relu,
                          init=init,
                          scope='sp3')
    y_sp_1 = tf_util.conv2D(y_sp,
Пример #6
0
def conv_block(x,
               num_filters=32,
               filter_dims=[5, 5],
               fc_size=1024,
               scope='conv_block',
               batch_size=4):

    s = x.get_shape().as_list()

    with tf.variable_scope(scope):
        #downsample image with stride [3,3]
        a = conv2D(x,
                   dims=[7, 7],
                   filters=num_filters,
                   strides=[3, 3],
                   std='xavier',
                   padding='VALID',
                   activation=tf.nn.relu,
                   scope='conv1')

        #no downsampling with stride [1,1]
        a = conv2D(a,
                   filter_dims,
                   filters=num_filters,
                   strides=[1, 1],
                   std='xavier',
                   padding='SAME',
                   activation=tf.nn.relu,
                   scope='conv2')

        num_filters = 2 * num_filters
        #downsample image with stride [2,2]
        a = conv2D(a,
                   filter_dims,
                   filters=num_filters,
                   strides=[2, 2],
                   std='xavier',
                   padding='VALID',
                   activation=tf.nn.relu,
                   scope='conv3')

        #no downsampling with stride [1,1]
        a = conv2D(a,
                   filter_dims,
                   filters=num_filters,
                   strides=[1, 1],
                   std='xavier',
                   padding='SAME',
                   activation=tf.nn.relu,
                   scope='conv4')

        num_filters = 2 * num_filters
        #downsample image with stride [2,2]
        a = conv2D(a,
                   filter_dims,
                   filters=num_filters,
                   strides=[2, 2],
                   std='xavier',
                   padding='VALID',
                   activation=tf.nn.relu,
                   scope='conv5')

        #no downsampling with stride [1,1]
        a = conv2D(a,
                   filter_dims,
                   filters=num_filters,
                   strides=[1, 1],
                   std='xavier',
                   padding='SAME',
                   activation=tf.nn.relu,
                   scope='conv6')

        #downsample image with stride [2,2]
        num_filters = 32
        a = conv2D(a,
                   filter_dims,
                   filters=num_filters,
                   strides=[2, 2],
                   std='xavier',
                   padding='VALID',
                   activation=tf.nn.relu,
                   scope='conv5')

        #Convert to vector with fullyconnected layer
        a = tf.reshape(a, shape=[batch_size, -1])

        a = fullyConnected(a,
                           output_units=fc_size,
                           activation=tf.nn.relu,
                           std='xavier',
                           scope='fc')

        print "output vector of conv_block is: {}".format(a)
        return a
Пример #7
0
    def create_models(self):
        cfg = self.cfg

        if self.mode == 'test':
            print('Creating test models....')
        else:
            print('Creating train models....')

        ########################
        ###### Parameters ######
        ########################
        nb_anchors = cfg.nb_anchors
        pool_size = cfg.pool_size
        nb_object_classes = cfg.nb_object_classes
        nb_hoi_classes = cfg.nb_hoi_classes
        print('   Obj. classes:', nb_object_classes)
        print('   HOI classes:', nb_hoi_classes)

        ########################
        ######## Inputs ########
        ########################

        # RPN #
        img_input = keras.layers.Input(shape=(None, None, 3),
                                       name='input_image')

        # DET #
        nb_detection_rois = cfg.nb_detection_rois if self.mode == 'train' else None
        img_det_input = keras.layers.Input(shape=(None, None, 3),
                                           name='input_image')
        roi_input = keras.layers.Input(shape=(nb_detection_rois, 5),
                                       name='input_roi')

        # HOI #
        nb_hoi_rois = cfg.nb_hoi_rois if self.mode == 'train' else None
        img_hoi_input = keras.layers.Input(shape=(None, None, 3),
                                           name='input_image')
        human_fast_input = keras.layers.Input(shape=(nb_hoi_rois, 5),
                                              name="input_human")
        object_fast_input = keras.layers.Input(shape=(nb_hoi_rois, 5),
                                               name="input_object")
        interaction_fast_input = keras.layers.Input(shape=(nb_hoi_rois,
                                                           cfg.winShape[0],
                                                           cfg.winShape[1], 2),
                                                    name="input_interaction")

        human_img_input = keras.layers.Input(shape=(227, 227, 3),
                                             name="input_human_img")

        object_img_input = keras.layers.Input(shape=(227, 227, 3),
                                              name="input_object_img")
        interaction_slow_input = keras.layers.Input(shape=(cfg.winShape[0],
                                                           cfg.winShape[1], 2),
                                                    name="input_interaction")
        human_slow_input = keras.layers.Input(shape=(5, ), name="input_human")
        object_slow_input = keras.layers.Input(shape=(5, ),
                                               name="input_object")

        # SHARED #
        features_input = keras.layers.Input(shape=(None, None, 512),
                                            name="input_features")

        ########################
        ######### RPN ##########
        ########################
        if self.do_rpn:
            print('   Creating RPN model...')
            output_features = models.VGG16_buildin(cfg)(img_input)
            self.nb_models += 1

            rpn_inputs = [img_input]

            rpn_features = layers.rpn(cfg)([output_features])

            x_class = keras.layers.Conv2D(
                filters=nb_anchors,
                kernel_size=(1, 1),
                activation='sigmoid',
                kernel_initializer=keras.initializers.RandomNormal(
                    stddev=0.01),
                kernel_regularizer=keras.regularizers.l2(cfg.weight_decay),
                bias_regularizer=keras.regularizers.l2(cfg.weight_decay),
                name='rpn_out_class')(rpn_features)

            x_deltas = keras.layers.Conv2D(
                filters=nb_anchors * 4,
                kernel_size=(1, 1),
                activation='linear',
                kernel_initializer=keras.initializers.RandomNormal(
                    stddev=0.01),
                kernel_regularizer=keras.regularizers.l2(cfg.weight_decay),
                bias_regularizer=keras.regularizers.l2(cfg.weight_decay),
                name='rpn_out_regress')(rpn_features)

            if self.mode == 'test' and cfg.use_shared_cnn:
                rpn_outputs = [x_class, x_deltas, output_features]
            else:
                rpn_outputs = [x_class, x_deltas]

            self.model_rpn = keras.models.Model(inputs=rpn_inputs,
                                                outputs=rpn_outputs)
            self.model_rpn.name = 'rpn'

            # Only train from conv3_1
            print('   Freezing first few layers...')

            nb_freeze_layers = 17 if cfg.do_finetune else cfg.nb_freeze_layers
            print('   Freeze up to', nb_freeze_layers)
            for i, layer in enumerate(self.model_rpn.layers):
                layer.trainable = False
                if i == nb_freeze_layers:
                    break

        ########################
        ###### Detection #######
        ########################
        if self.do_det:
            print('   Creating DET model...')

            self.nb_models += 1

            if self.mode == 'test' and cfg.use_shared_cnn:
                print('   -using shared CNN')
                output_features_det = features_input
                detection_inputs = [features_input, roi_input]
            else:
                output_features_det = models.VGG16_buildin(cfg)(img_det_input)
                detection_inputs = [img_det_input, roi_input]

            object_rois = layers.RoiPoolingConv(
                pool_size=pool_size, batch_size=cfg.nb_detection_rois)(
                    [output_features_det, roi_input])

            object_features = layers.fullyConnected(
                cfg, stream='det', use_dropout=True)([object_rois])

            object_scores = keras.layers.TimeDistributed(
                keras.layers.Dense(
                    units=nb_object_classes,
                    activation='softmax',
                    kernel_initializer=keras.initializers.RandomNormal(
                        stddev=0.01),
                    kernel_regularizer=keras.regularizers.l2(cfg.weight_decay),
                    bias_regularizer=keras.regularizers.l2(cfg.weight_decay)),
                name="det_out_class" if not cfg.do_finetune else
                "det_fineout_class")(object_features)

            object_deltas = keras.layers.TimeDistributed(
                keras.layers.Dense(
                    units=4 * (nb_object_classes - 1),
                    activation="linear",
                    kernel_initializer=keras.initializers.RandomNormal(
                        stddev=0.001),
                    kernel_regularizer=keras.regularizers.l2(cfg.weight_decay),
                    bias_regularizer=keras.regularizers.l2(cfg.weight_decay)),
                name="det_out_regress" if not cfg.do_finetune else
                "det_fineout_regress")(object_features)

            detection_outputs = [object_scores, object_deltas]

            self.model_det = keras.models.Model(inputs=detection_inputs,
                                                outputs=detection_outputs)
            self.model_det.name = 'det'

            # Only train from conv3_1
            nb_freeze_layers = 17 if cfg.do_finetune else cfg.nb_freeze_layers
            for i, layer in enumerate(self.model_det.layers):
                layer.trainable = False
                if i == nb_freeze_layers:
                    break

        ########################
        ######### HOI ##########
        ########################
        if self.do_hoi and cfg.do_fast_hoi:
            print('   Creating fast HOI model...')
            self.nb_models += 1

            if self.mode == 'test' and cfg.use_shared_cnn:
                print('   -using shared CNN')
                output_features_hoi = features_input
                hoi_inputs = [
                    features_input, human_fast_input, object_fast_input,
                    interaction_fast_input
                ]
            else:
                if cfg.backbone == 'vgg':
                    output_features_hoi = models.VGG16_buildin(cfg)(
                        img_hoi_input)
                else:
                    output_features_hoi = models.AlexNet_buildin(cfg)(
                        img_hoi_input)
                hoi_inputs = [
                    img_hoi_input, human_fast_input, object_fast_input,
                    interaction_fast_input
                ]

            ## HUMAN ##
            hoi_human_rois = layers.RoiPoolingConv(
                pool_size=pool_size,
                batch_size=cfg.nb_hoi_rois,
                mode=self.mode)([output_features_hoi, human_fast_input])

            hoi_human_features = layers.fullyConnected(
                cfg, stream='human')([hoi_human_rois])

            hoi_human_scores = keras.layers.TimeDistributed(
                keras.layers.Dense(
                    units=1 * nb_hoi_classes,
                    activation=None,
                    kernel_initializer=keras.initializers.RandomNormal(
                        stddev=0.01),
                    kernel_regularizer=keras.regularizers.l2(cfg.weight_decay),
                ),
                name="scores4human" if not cfg.do_finetune else
                "scores4human_finetune")(hoi_human_features)

            ## OBJECT ##
            hoi_object_rois = layers.RoiPoolingConv(
                pool_size=pool_size,
                batch_size=cfg.nb_hoi_rois,
                mode=self.mode)([output_features_hoi, object_fast_input])

            hoi_object_features = layers.fullyConnected(
                cfg, stream='object')([hoi_object_rois])

            hoi_object_scores = keras.layers.TimeDistributed(
                keras.layers.Dense(
                    units=1 * nb_hoi_classes,
                    activation=None,
                    kernel_initializer=keras.initializers.RandomNormal(
                        stddev=0.01),
                    kernel_regularizer=keras.regularizers.l2(cfg.weight_decay),
                ),
                name="scores4object" if not cfg.do_finetune else
                "scores4object_finetune")(hoi_object_features)

            ## INTERACTION ##
            hoi_pattern_features = layers.pairwiseStream(cfg=cfg)(
                [interaction_fast_input])
            hoi_pattern_scores = keras.layers.TimeDistributed(
                keras.layers.Dense(
                    units=1 * nb_hoi_classes,
                    activation=None,
                    kernel_initializer=keras.initializers.RandomNormal(
                        stddev=0.01),
                    kernel_regularizer=keras.regularizers.l2(cfg.weight_decay),
                ),
                name="scores4pattern" if not cfg.do_finetune else
                "scores4pattern_finetune")(hoi_pattern_features)

            ## FINAL ##
            hoi_score = keras.layers.Add()(
                [hoi_human_scores, hoi_object_scores, hoi_pattern_scores])

            hoi_final_score = keras.layers.Activation(
                "softmax" if cfg.do_categorical_hoi else 'sigmoid',
                name="hoi_out_class"
                if not cfg.do_finetune else "hoi_fineout_class")(hoi_score)

            human_fast_input = layers.identity(cfg)([human_fast_input])
            object_fast_input = layers.identity(cfg)([object_fast_input])

            if self.mode == 'test':
                hoi_outputs = [
                    hoi_final_score, human_fast_input, object_fast_input
                ]
            else:
                hoi_outputs = [hoi_final_score]

            self.model_hoi = keras.models.Model(inputs=hoi_inputs,
                                                outputs=hoi_outputs)
            self.model_hoi.name = 'hoi'

        if self.do_hoi and not cfg.do_fast_hoi:
            print('   Creating slow HOI model...')
            self.nb_models += 1

            if cfg.backbone == 'vgg':
                hoi_human_features = models.VGG16_buildin(cfg)(human_img_input)
                hoi_object_features = models.VGG16_buildin(cfg)(
                    object_img_input)
            else:
                hoi_human_features = models.AlexNet_buildin(cfg)(
                    human_img_input)
                hoi_object_features = models.AlexNet_buildin(cfg)(
                    object_img_input)

            hoi_inputs = [
                human_img_input, object_img_input, interaction_slow_input,
                human_slow_input, object_slow_input
            ]

            ## HUMAN ##
            hoi_human_scores = keras.layers.Dense(
                units=1 * nb_hoi_classes,
                activation=None,
                kernel_initializer=keras.initializers.RandomNormal(
                    stddev=0.01),
                kernel_regularizer=keras.regularizers.l2(cfg.weight_decay),
                name="scores4human")(hoi_human_features)

            ## OBJECT ##
            hoi_object_scores = keras.layers.Dense(
                units=1 * nb_hoi_classes,
                activation=None,
                kernel_initializer=keras.initializers.RandomNormal(
                    stddev=0.01),
                kernel_regularizer=keras.regularizers.l2(cfg.weight_decay),
                name="scores4object")(hoi_object_features)

            ## INTERACTION ##
            interaction_input = layers.intct_expansion(cfg)(
                [interaction_slow_input])

            hoi_pattern_features = layers.pairwiseStream(cfg=cfg)(
                [interaction_input])
            hoi_pattern_scores = keras.layers.TimeDistributed(
                keras.layers.Dense(
                    units=1 * nb_hoi_classes,
                    activation=None,
                    kernel_initializer=keras.initializers.RandomNormal(
                        stddev=0.01),
                    kernel_regularizer=keras.regularizers.l2(cfg.weight_decay),
                ),
                name='scores4pattern')(hoi_pattern_features)

            hoi_pattern_scores = layers.intct_reduction(cfg)(
                [hoi_pattern_scores])

            ## FINAL ##
            hoi_score = keras.layers.Add()(
                [hoi_human_scores, hoi_object_scores, hoi_pattern_scores])

            hoi_final_score = keras.layers.Activation(
                "sigmoid", name="hoi_out_class")(hoi_score)

            human_slow_input = layers.identity(cfg)([human_slow_input])
            object_slow_input = layers.identity(cfg)([object_slow_input])

            if self.mode == 'test':
                hoi_outputs = [
                    hoi_final_score, human_slow_input, object_slow_input
                ]
            else:
                hoi_outputs = [hoi_final_score]

            self.model_hoi = keras.models.Model(inputs=hoi_inputs,
                                                outputs=hoi_outputs)
            self.model_hoi.name = 'hoi'