def build(self):
        # Input
        self.input = tf.placeholder(
            dtype=tf.float32,
            shape=[None, self.img_size[0], self.img_size[1], self.img_size[2]])
        self.input_mean = tfutils.mean_value(self.input, self.img_mean)
        if self.base_net == 'vgg16':
            with slim.arg_scope(vgg.vgg_arg_scope()):
                outputs, end_points = vgg.vgg_16(self.input_mean,
                                                 self.num_classes)
                self.prob = tf.nn.softmax(outputs, -1)
                self.logits = outputs

        elif self.base_net == 'res50':
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                net, end_points = resnet_v1.resnet_v1_50(
                    self.input_mean,
                    self.num_classes,
                    is_training=self.is_train)
                self.prob = tf.nn.softmax(net[:, 0, 0, :], -1)
                self.logits = net[:, 0, 0, :]
        elif self.base_net == 'res101':
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                net, end_points = resnet_v1.resnet_v1_101(
                    self.input_mean,
                    self.num_classes,
                    is_training=self.is_train)
                self.prob = tf.nn.softmax(net[:, 0, 0, :], -1)
                self.logits = net[:, 0, 0, :]
        elif self.base_net == 'res152':
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                net, end_points = resnet_v1.resnet_v1_152(
                    self.input_mean,
                    self.num_classes,
                    is_training=self.is_train)
                self.prob = tf.nn.softmax(net[:, 0, 0, :], -1)
                self.logits = net[:, 0, 0, :]
        else:
            raise ValueError(
                'base network should be vgg16, res50, -101, -152...')
        self.gt = tf.placeholder(dtype=tf.int32, shape=[None])
        # self.var_list = tf.trainable_variables()

        if self.is_train:
            self.loss()
示例#2
0

# Define the output folder
out_file = "gw_{mode}_{network}_{feature_name}_{size}".format(
    mode=args.mode, network=args.network, feature_name=args.feature_name, size=args.img_size)


print("Create networks...")
if args.network == "resnet":
    ft_output = resnet.create_resnet(images,
                                     resnet_out=args.feature_name,
                                     resnet_version=args.resnet_version,
                                     is_training=False)
    # create network
    with slim.arg_scope(slim_utils.resnet_arg_scope(is_training=False)):
        _, end_points = resnet_v1.resnet_v1_152(images, 1000)  # 1000 is the number of softmax class


elif args.network == "vgg":
    _, end_points = vgg.vgg_16(images, is_training=False, dropout_keep_prob=1.0)
    ft_name = os.path.join("vgg_16", args.feature_name)
    ft_output = end_points[ft_name]
else:
    assert False, "Incorrect Network"


extract_features(
    img_input = images,
    ft_output = ft_output,
    dataset_cstor = dataset_cstor,
    dataset_args = {"folder": args.data_dir, "image_builder":image_builder, "crop_builder":crop_builder, "dataset_name": args.dataset_name},
def main(_):

    with tf.name_scope('input_placeholder'):
        mv_placeholder = tf.placeholder(tf.float32, 
                    shape=(None, FLAGS.num_segments, 224, 224, 3 ), name = 'mv_frame')
        flow_placeholder = tf.placeholder(tf.float32, 
                    shape=(None, FLAGS.num_segments, 224, 224, 3 ), name = 'flow_frame')
        i_placeholder = tf.placeholder(tf.float32,
                    shape=(None, FLAGS.num_segments, 224, 224, 3 ), name = 'i_frame')
        r_placeholder = tf.placeholder(tf.float32,
                    shape=(None, FLAGS.num_segments, 224, 224, 3 ), name = 'r_frame')

    with tf.name_scope('label_placeholder'):
        label_placeholder = tf.placeholder(tf.int32, shape=(None), name = 'labels')

    with tf.name_scope('accuracy'):
        combine_value_ = tf.placeholder(tf.float32, shape=(), name = 'accuracy')
        i_value_ = tf.placeholder(tf.float32, shape=(), name = 'accuracy')
        mv_value_ = tf.placeholder(tf.float32, shape=(), name = 'accuracy')
        r_value_ = tf.placeholder(tf.float32, shape=(), name = 'accuracy')
        tf.summary.scalar('combine_acc', combine_value_)
        tf.summary.scalar('i_acc', i_value_)
        tf.summary.scalar('mv_acc', mv_value_)
        tf.summary.scalar('r_acc', r_value_)
        
    print('Finish placeholder.')


    with tf.name_scope('flatten_input'):
        b_size = tf.shape(mv_placeholder)[0]
        flat_mv = tf.reshape(mv_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3]) # Since we have mulitple segments in a single video
        flat_flow = tf.reshape(flow_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3])
        flat_i = tf.reshape(i_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3])
        flat_r = tf.reshape(r_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3])

    with tf.variable_scope('fc_var') as var_scope:
        mv_weights = {
            'w1': _variable_with_weight_decay('wmv1', [2048 , 512 ], 0.0005),
            'w2': _variable_with_weight_decay('wmv2', [512 , N_CLASS], 0.0005)
        }
        mv_biases = {
            'b1': _variable_with_weight_decay('bmv1', [ 512 ], 0.00),
            'b2': _variable_with_weight_decay('bmv2', [ N_CLASS ], 0.00)
        }
        i_weights = {
            'w1': _variable_with_weight_decay('wi1', [2048 , 512 ], 0.0005),
            'w2': _variable_with_weight_decay('wi2', [512 , N_CLASS], 0.0005)
        }
        i_biases = {
            'b1': _variable_with_weight_decay('bi1', [ 512 ], 0.00),
            'b2': _variable_with_weight_decay('bi2', [ N_CLASS ], 0.00)
        }
        r_weights = {
            'w1': _variable_with_weight_decay('wr1', [2048 , 512 ], 0.0005),
            'w2': _variable_with_weight_decay('wr2', [512 , N_CLASS], 0.0005)
        }
        r_biases = {
            'b1': _variable_with_weight_decay('br1', [ 512 ], 0.00),
            'b2': _variable_with_weight_decay('br2', [ N_CLASS ], 0.00)
        }

    with tf.variable_scope('fusion_var'):
        fusion = tf.get_variable('fusion', [3], initializer=tf.contrib.layers.xavier_initializer())
    
    print('Finish Flatten.')
    
    with tf.device('/gpu:0'):

        with tf.name_scope('FLMG'):
            mv_res = tf.concat([flat_mv, flat_r], axis = -1)
            mv = slim.conv2d(mv_res, 8, kernel_size=[3, 3], scope = 'FLMG_1')
            mv = slim.conv2d(mv, 8, kernel_size=[3, 3], scope = 'FLMG_2')
            mv = slim.conv2d(mv, 6, kernel_size=[3, 3], scope = 'FLMG_3')
            mv = slim.conv2d(mv, 4, kernel_size=[3, 3], scope = 'FLMG_4')
            mv = slim.conv2d(mv, 2, kernel_size=[3, 3], scope = 'FLMG_5')
            mv = slim.conv2d(mv, 3, kernel_size=[3, 3], scope = 'FLMG_6')

        with tf.name_scope('FLMG_LOSS'):
            # The cost function -- l2 mse
            matrix_pow_2 = tf.pow(tf.subtract(mv, flat_flow), 2)
            matrix_norm = tf.reduce_sum(matrix_pow_2, axis = [1,2,3])

            flmg_loss = tf.reduce_mean(matrix_norm)
            tf.summary.scalar('flmg_loss', flmg_loss)

        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            i_feature, _ = resnet_v1.resnet_v1_152(flat_i, num_classes=None, is_training=True, scope='i_resnet')
            mv_feature, _ = resnet_v1.resnet_v1_50(mv, num_classes=None, is_training=True, scope='mv_resnet')
            r_feature, _ = resnet_v1.resnet_v1_50(flat_r, num_classes=None, is_training=True, scope='r_resnet')


        with tf.name_scope('reshape_feature'):
            i_feature = tf.reshape(i_feature, [-1, 2048])
            mv_feature = tf.reshape(mv_feature, [-1, 2048])
            r_feature = tf.reshape(r_feature, [-1, 2048])


        with tf.name_scope('inference_model'):

            i_sc, i_pred = model.inference_feature (i_feature, i_weights, i_biases,
                                                      FLAGS.num_segments, N_CLASS, name = 'i_inf')

            mv_sc, mv_pred = model.inference_feature (mv_feature, mv_weights, mv_biases,
                                                      FLAGS.num_segments, N_CLASS, name = 'mv_inf')

            r_sc, r_pred = model.inference_feature (r_feature, r_weights, r_biases,
                                                      FLAGS.num_segments, N_CLASS, name = 'r_inf')

            combine_sc, pred_class = model.inference_fusion ( i_sc, mv_sc, r_sc, fusion)

    print('Finish Model.')
    
    with tf.name_scope('classiciation_loss'):
        one_hot_labels = tf.one_hot(label_placeholder, N_CLASS)
        mv_class_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = mv_sc, labels = one_hot_labels, dim=1))
        i_class_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = i_sc, labels = one_hot_labels, dim=1))
        r_class_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = r_sc, labels = one_hot_labels, dim=1))
        tf.summary.scalar('mv_cls_loss', mv_class_loss) 
        tf.summary.scalar('i_cls_loss', i_class_loss) 
        tf.summary.scalar('r_cls_loss', r_class_loss)

        combine_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = combine_sc, labels = one_hot_labels, dim=1))
        tf.summary.scalar('fuse_cls_loss', combine_loss)

        total_loss = combine_loss + i_class_loss + mv_class_loss + r_class_loss + flmg_loss
        tf.summary.scalar('tot_cls_loss', total_loss)


    with tf.name_scope('weigh_decay'):
        weight_loss = sum(tf.get_collection('losses'))
        tf.summary.scalar('eight_decay_loss', weight_loss)

    '''
    with tf.name_scope('training_var_list'):
        mv_variable_list = list ( set(mv_weights.values()) | set(mv_biases.values()) )
        mv_resnet_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='mv_resnet')
        i_variable_list = list ( set(i_weights.values()) | set(i_biases.values()) )
        i_resnet_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='i_resnet')
        r_variable_list = list ( set(r_weights.values()) | set(r_biases.values()) )
        r_resnet_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='r_resnet')
    
    with tf.name_scope('summary_var'):
        _variable_summaries(mv_weights['w1'])
        _variable_summaries(i_weights['w2'])
        _variable_summaries(r_weights['w2'])
        _variable_summaries(mv_resnet_variables[0])
        _variable_summaries(i_resnet_variables[0])
        _variable_summaries(r_resnet_variables[0])
        _variable_summaries(fusion)
        
    print('Finish variables.')
    '''
    
    with tf.name_scope('optimizer'):
        '''
        mv_fc_opt = tf.train.AdamOptimizer(FLAGS.mv_lr).minimize(mv_class_loss + weight_loss, var_list = mv_variable_list)
        mv_res_opt = tf.train.AdamOptimizer(FLAGS.resnet_lr).minimize(mv_class_loss, var_list = mv_resnet_variables)
        i_fc_opt = tf.train.AdamOptimizer(FLAGS.i_lr).minimize(i_class_loss + weight_loss, var_list = i_variable_list)
        i_res_opt = tf.train.AdamOptimizer(FLAGS.resnet_lr).minimize(i_class_loss, var_list = i_resnet_variables)
        r_fc_opt = tf.train.AdamOptimizer(FLAGS.r_lr).minimize(r_class_loss + weight_loss, var_list = r_variable_list)
        r_res_opt = tf.train.AdamOptimizer(FLAGS.resnet_lr).minimize(r_class_loss, var_list = r_resnet_variables)
        fusion_opt = tf.train.GradientDescentOptimizer(10e-6).minimize(combine_loss, var_list = fusion)
        '''

        train_opt = tf.train.AdamOptimizer(FLAGS.tot_lr).minimize(total_loss, var_list = tf.trainable_variables())



    print('Finish Optimizer.')
    
    with tf.name_scope('init_function'):
        init_var = tf.global_variables_initializer()
    
    
    
    with tf.name_scope('video_dataset'):
        train_data = dataset.buildTrainDataset_v2(FLAGS.train_list, FLAGS.data_path, FLAGS.num_segments,
                                                  batch_size = FLAGS.batch_size, augment = False,
                                                  shuffle = True, num_threads=2, buffer=100)
        test_data = dataset.buildTestDataset(FLAGS.valid_list, FLAGS.data_path, FLAGS.num_segments, 
                                             batch_size = FLAGS.batch_size, num_threads = 2, buffer = 30)
        
    
        with tf.name_scope('dataset_iterator'):
            it = tf.contrib.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes)
            next_data = it.get_next()
            init_data = it.make_initializer(train_data)
            it_test = tf.contrib.data.Iterator.from_structure(test_data.output_types, test_data.output_shapes)
            next_test_data = it_test.get_next()
            init_test_data = it_test.make_initializer(test_data)
            
            
    print('Finish Dataset.')

    restore_var = [v for v in tf.trainable_variables() if ('Adam'  not in v.name)]

    first_restore_var = [v for v in tf.trainable_variables() if ('Adam'  not in v.name and 'FLMG' not in v.name)]
    first_saver = tf.train.Saver(var_list=first_restore_var)

    my_saver = tf.train.Saver(var_list=restore_var, max_to_keep=5)

    
    config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
    sess = tf.Session(config=config)
    
    with tf.name_scope('writer'):

        merged = tf.summary.merge_all()
        if not tf.gfile.Exists(FLAGS.log_path):
            tf.gfile.MakeDirs(FLAGS.log_path)
        previous_runs = os.listdir(FLAGS.log_path)
        if len(previous_runs) == 0:
            run_number = 1
        else:
            run_number = len(previous_runs) + 1
        logdir = 'run_%02d' % run_number
        tf.gfile.MakeDirs(os.path.join(FLAGS.log_path, logdir))
        writer = tf.summary.FileWriter(os.path.join(FLAGS.log_path, logdir), sess.graph)

    with tf.name_scope('saver'):

        if not tf.gfile.Exists(FLAGS.save_path):
            tf.gfile.MakeDirs(FLAGS.save_path)
        '''
        i_saver = tf.train.Saver(i_variable_list)
        mv_saver = tf.train.Saver(mv_variable_list)
        r_saver = tf.train.Saver(r_variable_list)
        i_resnet_saver = tf.train.Saver(i_resnet_variables)
        mv_resnet_saver = tf.train.Saver(mv_resnet_variables)
        r_resnet_saver = tf.train.Saver(r_resnet_variables)
        '''

    with tf.name_scope('intialization'):
        sess.run(init_var)
        sess.run(init_data)
        sess.run(init_test_data)

        #init_i_resent (sess)
        #init_mv_resent (sess)
        #init_r_resent(sess)
        '''
        i_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'i_model.chkp'+FLAGS.steps))
        mv_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'mv_model.chkp'+FLAGS.steps))
        r_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'r_model.chkp'+FLAGS.steps))
        i_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'i_resnet.chkp'+FLAGS.steps))
        mv_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'mv_resnet.chkp'+FLAGS.steps))
        r_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'r_resnet.chkp'+FLAGS.steps))
        '''
        try:
            my_saver.restore(sess, FLAGS.continue_training)
        except:
            # First train
            first_saver.restore(sess, FLAGS.continue_training)

            '''
            i_resnet_saver = tf.train.Saver(i_resnet_variables)
            mv_resnet_saver = tf.train.Saver(mv_resnet_variables)
            r_resnet_saver = tf.train.Saver(r_resnet_variables)
            i_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'i_resnet.chkp'+FLAGS.steps))
            mv_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'mv_resnet.chkp'+FLAGS.steps))
            r_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'r_resnet.chkp'+FLAGS.steps))
            '''

        print('Finish Loading Pretrained Model.')

        
    '''
    Main training loop
    '''
    combine_acc = 0
    i_acc = 0
    mv_acc = 0
    r_acc = 0
    start_time = time.time()
    for step in range(FLAGS.max_steps):
        # Validation
        
        
        if (step) % 1000 == 0 and step > 0:
            combine_classes = []
            mv_classes = []
            i_classes = []
            r_classes = []
            gt_label = []
            

            for i in range(100):
                ti_arr, tmv_arr, tr_arr, tlabel = sess.run(next_test_data)
                i_class, mv_class, r_class, com_class = sess.run([i_pred, mv_pred, r_pred, pred_class], 
                                    feed_dict={mv_placeholder: tmv_arr, i_placeholder: ti_arr,
                                               r_placeholder: tr_arr , label_placeholder : tlabel })
                combine_classes = np.append(combine_classes, com_class)
                mv_classes = np.append(mv_classes, mv_class)
                i_classes = np.append(i_classes, i_class)
                r_classes = np.append(r_classes, r_class)
                gt_label = np.append(gt_label, tlabel)
            
            combine_acc = np.sum((combine_classes == gt_label)) / gt_label.size
            i_acc = np.sum((i_classes == gt_label)) / gt_label.size
            mv_acc = np.sum((mv_classes == gt_label)) / gt_label.size
            r_acc = np.sum((r_classes == gt_label)) / gt_label.size

            print('Step %d finished with accuracy: %f , %f , %f, %f' % (step, i_acc, mv_acc, r_acc, combine_acc))
        
        
        # Training procedure
        i_arr, mv_arr, r_arr, flow_arr, label = sess.run(next_data)
        summary, _, pred, loss1, loss2, loss3, loss4, loss5 = sess.run([merged, train_opt, pred_class, mv_class_loss, i_class_loss, r_class_loss, combine_loss, flmg_loss],
                                    feed_dict={mv_placeholder: mv_arr, i_placeholder: i_arr,
                                                flow_placeholder: flow_arr,
                                               r_placeholder: r_arr , label_placeholder : label,
                                               combine_value_: combine_acc, i_value_ : i_acc,  
                                               mv_value_: mv_acc, r_value_ : r_acc})

        if (step) % 10 == 0 :
            duration = time.time() - start_time
            print('Step %d: %.3f sec' % (step, duration), 'mv_loss:', loss1,  'i_loss:', loss2,  'r_loss:', loss3, 'fusion_loss:', loss4, 'flmg_loss:', loss5)
            print('GT:', label)
            print('Pred:', pred)

            writer.add_summary(summary, step)
            start_time = time.time()

        # Model Saving 

        if (step) % 1000 == 0 and not step == 0 :
            '''
            i_saver.save(sess, os.path.join(FLAGS.save_path, 'i_model.chkp'), global_step = step)
            mv_saver.save(sess, os.path.join(FLAGS.save_path, 'mv_model.chkp'), global_step = step)
            r_saver.save(sess, os.path.join(FLAGS.save_path, 'r_model.chkp'), global_step = step)

            i_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'i_resnet.chkp'), global_step = step)
            mv_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'mv_resnet.chkp'), global_step = step)
            r_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'r_resnet.chkp'), global_step = step)
            '''

            my_saver.save(sess, os.path.join(FLAGS.save_path, 'all_net.chkp'), global_step = step)

        #if (step) % 10000 == 0 and not step == 0 :
        #    i_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'i_resnet.chkp'), global_step = step)
        #    mv_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'mv_resnet.chkp'), global_step = step)
        #    r_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'r_resnet.chkp'), global_step = step)

    
    writer.close()
示例#4
0
def run_training():
    config = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(config=config)
    #     sess = tf.Session() # config=tf.ConfigProto(log_device_placement=True))

    # create input path and labels np.array from csv annotations
    df_annos = pd.read_csv(ANNOS_CSV, index_col=0)
    df_annos = df_annos.sample(frac=1).reset_index(
        drop=True)  # shuffle the whole datasets
    if DATA == 'l8':
        path_col = ['l8_vis_jpg']
    elif DATA == 's1':
        path_col = ['s1_vis_jpg']
    elif DATA == 'l8s1':
        path_col = ['l8_vis_jpg', 's1_vis_jpg']

    input_files_train = JPG_DIR + df_annos.loc[df_annos.partition == 'train',
                                               path_col].values
    input_labels_train = df_annos.loc[df_annos.partition == 'train',
                                      'pop_density_log2'].values
    input_files_val = JPG_DIR + df_annos.loc[df_annos.partition == 'val',
                                             path_col].values
    input_labels_val = df_annos.loc[df_annos.partition == 'val',
                                    'pop_density_log2'].values
    input_id_train = df_annos.loc[df_annos.partition == 'train',
                                  'village_id'].values
    input_id_val = df_annos.loc[df_annos.partition == 'val',
                                'village_id'].values

    print('input_files_train shape:', input_files_train.shape)
    train_set_size = len(input_labels_train)

    # data input
    with tf.device('/cpu:0'):
        train_images_batch, train_labels_batch, _ = \
        dataset.input_batches(FLAGS.batch_size, FLAGS.output_size, input_files_train, input_labels_train, input_id_train,
                              IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNEL, regression=True, augmentation=True, normalization=True)
        val_images_batch, val_labels_batch, _ = \
        dataset.input_batches(FLAGS.batch_size, FLAGS.output_size, input_files_val, input_labels_val, input_id_val,
                              IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNEL, regression=True, augmentation=False, normalization=True)

    images_placeholder = tf.placeholder(
        tf.float32, shape=[None, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNEL])
    labels_placeholder = tf.placeholder(tf.float32, shape=[
        None,
    ])
    print('finish data input')

    TRAIN_BATCHES_PER_EPOCH = int(
        train_set_size /
        FLAGS.batch_size)  # number of training batches/steps in each epoch
    MAX_STEPS = TRAIN_BATCHES_PER_EPOCH * FLAGS.max_epoch  # total number of training batches/steps

    # CNN forward reference
    if MODEL == 'vgg':
        with slim.arg_scope(
                vgg.vgg_arg_scope(weight_decay=FLAGS.weight_decay)):
            outputs, _ = vgg.vgg_16(images_placeholder,
                                    num_classes=FLAGS.output_size,
                                    dropout_keep_prob=FLAGS.dropout_keep,
                                    is_training=True)
            outputs = tf.squeeze(
                outputs
            )  # change shape from (B,1) to (B,), same as label input
    if MODEL == 'resnet':
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            outputs, _ = resnet_v1.resnet_v1_152(images_placeholder,
                                                 num_classes=FLAGS.output_size,
                                                 is_training=True)
            outputs = tf.squeeze(
                outputs
            )  # change shape from (B,1) to (B,), same as label input

    # loss
    labels_real = tf.pow(2.0, labels_placeholder)
    outputs_real = tf.pow(2.0, outputs)

    # only loss_log2_mse are used for gradient calculate, model minimize this value
    loss_log2_mse = tf.reduce_mean(tf.squared_difference(
        labels_placeholder, outputs),
                                   name='loss_log2_mse')
    loss_real_rmse = tf.sqrt(tf.reduce_mean(
        tf.squared_difference(labels_real, outputs_real)),
                             name='loss_real_rmse')
    loss_real_mae = tf.losses.absolute_difference(labels_real, outputs_real)

    tf.summary.scalar('loss_log2_mse', loss_log2_mse)
    tf.summary.scalar('loss_real_rmse', loss_real_rmse)
    tf.summary.scalar('loss_real_mae', loss_real_mae)

    # accuracy (R2)
    def r_sqaured(labels, outputs):
        sst = tf.reduce_sum(
            tf.squared_difference(labels, tf.reduce_mean(labels)))
        sse = tf.reduce_sum(tf.squared_difference(labels, outputs))
        return (1.0 - tf.div(sse, sst))

    r2_log2 = r_sqaured(labels_placeholder, outputs)
    r2_real = r_sqaured(labels_real, outputs_real)

    tf.summary.scalar('r2_log2', r2_log2)
    tf.summary.scalar('r2_real', r2_real)

    # determine the model vairables to restore from pre-trained checkpoint
    if MODEL == 'vgg':
        if DATA == 'l8s1':
            model_variables = slim.get_variables_to_restore(
                exclude=['vgg_16/fc8', 'vgg_16/conv1'])
        else:
            model_variables = slim.get_variables_to_restore(
                exclude=['vgg_16/fc8'])
    if MODEL == 'resnet':
        model_variables = slim.get_variables_to_restore(
            exclude=['resnet_v1_152/logits', 'resnet_v1_152/conv1'])

    # training step and learning rate
    global_step = tf.Variable(0, name='global_step',
                              trainable=False)  #, dtype=tf.int64)
    learning_rate = tf.train.exponential_decay(
        FLAGS.learning_rate,  # initial learning rate
        global_step=global_step,  # current step
        decay_steps=MAX_STEPS,  # total numbers step to decay 
        decay_rate=FLAGS.lr_decay_rate
    )  # final learning rate = FLAGS.learning_rate * decay_rate
    tf.summary.scalar('learning_rate', learning_rate)

    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    #     optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate)
    #     optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)

    # to only update gradient in first and last layer
    #     vars_update = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'vgg_16/(conv1|fc8)')
    #     print('variables to update in traing: ', vars_update)

    train_op = optimizer.minimize(
        loss_log2_mse, global_step=global_step)  #, var_list = vars_update)

    # summary output in tensorboard
    summary = tf.summary.merge_all()
    summary_writer_train = tf.summary.FileWriter(
        os.path.join(LOG_DIR, 'log_train'), sess.graph)
    summary_writer_val = tf.summary.FileWriter(
        os.path.join(LOG_DIR, 'log_val'), sess.graph)

    # variable initialize
    init = tf.global_variables_initializer()
    sess.run(init)

    # restore the model from pre-trained checkpoint
    restorer = tf.train.Saver(model_variables)
    restorer.restore(sess, PRETRAIN_WEIGHTS)
    print('loaded pre-trained weights: ', PRETRAIN_WEIGHTS)

    # saver object to save checkpoint during training
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)

    print('start training...')
    epoch = 0
    best_r2 = -float('inf')
    for step in xrange(MAX_STEPS):
        if step % TRAIN_BATCHES_PER_EPOCH == 0:
            epoch += 1

        start_time = time.time()  # record the time used for each batch

        images_out, labels_out = sess.run(
            [train_images_batch,
             train_labels_batch])  # inputs of this batch, numpy array format

        duration_batch = time.time() - start_time

        if step == 0:
            print("finished reading batch data")
            print("images_out shape:", images_out.shape)
        feed_dict = {
            images_placeholder: images_out,
            labels_placeholder: labels_out
        }
        _, train_loss, train_accuracy, train_outputs, lr = \
            sess.run([train_op, loss_log2_mse, r2_log2, outputs, learning_rate], feed_dict=feed_dict)

        duration = time.time() - start_time

        if step % 10 == 0 or (
                step + 1) == MAX_STEPS:  # print traing loss every 10 batches
            print('Step %d epoch %d lr %.3e: log2 MSE loss = %.4f log2 R2 = %.4f (%.3f sec, %.3f sec(each batch))' \
                  % (step, epoch, lr, train_loss, train_accuracy, duration*10, duration_batch))
            summary_str = sess.run(summary, feed_dict=feed_dict)
            summary_writer_train.add_summary(summary_str, step)
            summary_writer_train.flush()

        if step % 50 == 0 or (
                step + 1
        ) == MAX_STEPS:  # calculate and print validation loss every 50 batches
            images_out, labels_out = sess.run(
                [val_images_batch, val_labels_batch])
            feed_dict = {
                images_placeholder: images_out,
                labels_placeholder: labels_out
            }

            val_loss, val_accuracy = sess.run([loss_log2_mse, r2_log2],
                                              feed_dict=feed_dict)
            print('Step %d epoch %d: val log2 MSE = %.4f val log2 R2 = %.4f ' %
                  (step, epoch, val_loss, val_accuracy))

            summary_str = sess.run(summary, feed_dict=feed_dict)
            summary_writer_val.add_summary(summary_str, step)
            summary_writer_val.flush()

            # in each epoch, if the validation R2 is higher than best R2, save the checkpoint
            if step % (TRAIN_BATCHES_PER_EPOCH -
                       TRAIN_BATCHES_PER_EPOCH % 50) == 0:
                if val_accuracy > best_r2:
                    best_r2 = val_accuracy
                    checkpoint_file = os.path.join(LOG_DIR, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_file,
                               global_step=step,
                               write_state=True)
示例#5
0
def main(_):

    with tf.name_scope('input_placeholder'):
        mv_placeholder = tf.placeholder(tf.float32,
                                        shape=(None, FLAGS.num_segments, 224,
                                               224, 3),
                                        name='mv_frame')
        i_placeholder = tf.placeholder(tf.float32,
                                       shape=(None, FLAGS.num_segments, 224,
                                              224, 3),
                                       name='i_frame')
        r_placeholder = tf.placeholder(tf.float32,
                                       shape=(None, FLAGS.num_segments, 224,
                                              224, 3),
                                       name='r_frame')

    with tf.name_scope('label_placeholder'):
        label_placeholder = tf.placeholder(tf.int32,
                                           shape=(None),
                                           name='labels')

    with tf.name_scope('accuracy'):
        combine_value_ = tf.placeholder(tf.float32, shape=(), name='accuracy')
        i_value_ = tf.placeholder(tf.float32, shape=(), name='accuracy')
        mv_value_ = tf.placeholder(tf.float32, shape=(), name='accuracy')
        r_value_ = tf.placeholder(tf.float32, shape=(), name='accuracy')
        tf.summary.scalar('combine_acc', combine_value_)
        tf.summary.scalar('i_acc', i_value_)
        tf.summary.scalar('mv_acc', mv_value_)
        tf.summary.scalar('r_acc', r_value_)

    with tf.name_scope('flatten_input'):
        b_size = tf.shape(mv_placeholder)[0]
        flat_mv = tf.reshape(
            mv_placeholder,
            [b_size * FLAGS.num_segments, 224, 224, 3
             ])  # Since we have mulitple segments in a single video
        flat_i = tf.reshape(i_placeholder,
                            [b_size * FLAGS.num_segments, 224, 224, 3])
        flat_r = tf.reshape(r_placeholder,
                            [b_size * FLAGS.num_segments, 224, 224, 3])

    with tf.variable_scope('fc_var') as var_scope:
        mv_weights = {
            'w1': _variable_with_weight_decay('wmv1', [2048, 512], 0.0005),
            'w2': _variable_with_weight_decay('wmv2', [512, N_CLASS], 0.0005)
        }
        mv_biases = {
            'b1': _variable_with_weight_decay('bmv1', [512], 0.00),
            'b2': _variable_with_weight_decay('bmv2', [N_CLASS], 0.00)
        }
        i_weights = {
            'w1': _variable_with_weight_decay('wi1', [2048, 512], 0.0005),
            'w2': _variable_with_weight_decay('wi2', [512, N_CLASS], 0.0005)
        }
        i_biases = {
            'b1': _variable_with_weight_decay('bi1', [512], 0.00),
            'b2': _variable_with_weight_decay('bi2', [N_CLASS], 0.00)
        }
        r_weights = {
            'w1': _variable_with_weight_decay('wr1', [2048, 512], 0.0005),
            'w2': _variable_with_weight_decay('wr2', [512, N_CLASS], 0.0005)
        }
        r_biases = {
            'b1': _variable_with_weight_decay('br1', [512], 0.00),
            'b2': _variable_with_weight_decay('br2', [N_CLASS], 0.00)
        }

    with tf.variable_scope('fusion_var'):
        fusion = tf.get_variable(
            'fusion', [3], initializer=tf.contrib.layers.xavier_initializer())

    with tf.device('/gpu:' + str(gpunumber)):

        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            i_feature, _ = resnet_v1.resnet_v1_152(flat_mv,
                                                   num_classes=None,
                                                   is_training=True,
                                                   scope='i_resnet')
            mv_feature, _ = resnet_v1.resnet_v1_50(flat_i,
                                                   num_classes=None,
                                                   is_training=True,
                                                   scope='mv_resnet')
            r_feature, _ = resnet_v1.resnet_v1_50(flat_r,
                                                  num_classes=None,
                                                  is_training=True,
                                                  scope='r_resnet')

        with tf.name_scope('reshape_feature'):
            i_feature = tf.reshape(i_feature, [-1, 2048])
            mv_feature = tf.reshape(mv_feature, [-1, 2048])
            r_feature = tf.reshape(r_feature, [-1, 2048])

        with tf.name_scope('inference_model'):

            i_sc, i_pred = model.inference_feature(i_feature,
                                                   i_weights,
                                                   i_biases,
                                                   FLAGS.num_segments,
                                                   N_CLASS,
                                                   name='i_inf')

            mv_sc, mv_pred = model.inference_feature(mv_feature,
                                                     mv_weights,
                                                     mv_biases,
                                                     FLAGS.num_segments,
                                                     N_CLASS,
                                                     name='mv_inf')

            r_sc, r_pred = model.inference_feature(r_feature,
                                                   r_weights,
                                                   r_biases,
                                                   FLAGS.num_segments,
                                                   N_CLASS,
                                                   name='r_inf')

            combine_sc, pred_class = model.inference_fusion(
                i_sc, mv_sc, r_sc, fusion)

    with tf.name_scope('classiciation_loss'):
        one_hot_labels = tf.one_hot(label_placeholder, N_CLASS)
        mv_class_loss = tf.reduce_sum(
            tf.nn.softmax_cross_entropy_with_logits_v2(logits=mv_sc,
                                                       labels=one_hot_labels,
                                                       dim=1))
        i_class_loss = tf.reduce_sum(
            tf.nn.softmax_cross_entropy_with_logits_v2(logits=i_sc,
                                                       labels=one_hot_labels,
                                                       dim=1))
        r_class_loss = tf.reduce_sum(
            tf.nn.softmax_cross_entropy_with_logits_v2(logits=r_sc,
                                                       labels=one_hot_labels,
                                                       dim=1))
        tf.summary.scalar('mv_class_loss', mv_class_loss)
        tf.summary.scalar('i_class_loss', i_class_loss)
        tf.summary.scalar('r_class_loss', r_class_loss)

        combine_loss = tf.reduce_sum(
            tf.nn.softmax_cross_entropy_with_logits_v2(logits=combine_sc,
                                                       labels=one_hot_labels,
                                                       dim=1))
        tf.summary.scalar('combine_class_loss', combine_loss)

    with tf.name_scope('weigh_decay'):
        weight_loss = sum(tf.get_collection('losses'))
        tf.summary.scalar('eight_decay_loss', weight_loss)

    with tf.name_scope('training_var_list'):
        mv_variable_list = list(
            set(mv_weights.values()) | set(mv_biases.values()))
        mv_resnet_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                                scope='mv_resnet')
        i_variable_list = list(
            set(i_weights.values()) | set(i_biases.values()))
        i_resnet_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                               scope='i_resnet')
        r_variable_list = list(
            set(r_weights.values()) | set(r_biases.values()))
        r_resnet_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                               scope='r_resnet')

    with tf.name_scope('summary_var'):
        _variable_summaries(mv_weights['w1'])
        _variable_summaries(i_weights['w2'])
        _variable_summaries(r_weights['w2'])
        _variable_summaries(mv_resnet_variables[0])
        _variable_summaries(i_resnet_variables[0])
        _variable_summaries(r_resnet_variables[0])
        _variable_summaries(fusion)

    with tf.name_scope('optimizer'):
        mv_fc_opt = tf.train.AdamOptimizer(FLAGS.mv_lr).minimize(
            mv_class_loss + weight_loss, var_list=mv_variable_list)
        mv_res_opt = tf.train.AdamOptimizer(FLAGS.resnet_lr).minimize(
            mv_class_loss, var_list=mv_resnet_variables)
        i_fc_opt = tf.train.AdamOptimizer(FLAGS.i_lr).minimize(
            i_class_loss + weight_loss, var_list=i_variable_list)
        i_res_opt = tf.train.AdamOptimizer(FLAGS.resnet_lr).minimize(
            i_class_loss, var_list=i_resnet_variables)
        r_fc_opt = tf.train.AdamOptimizer(FLAGS.r_lr).minimize(
            r_class_loss + weight_loss, var_list=r_variable_list)
        r_res_opt = tf.train.AdamOptimizer(FLAGS.resnet_lr).minimize(
            r_class_loss, var_list=r_resnet_variables)
        fusion_opt = tf.train.GradientDescentOptimizer(10e-6).minimize(
            combine_loss, var_list=fusion)

    with tf.name_scope('init_function'):
        init_var = tf.global_variables_initializer()
        init_i_resent = slim.assign_from_checkpoint_fn(
            os.path.join(FLAGS.pretrained_path, 'i_resnet.chkp'),
            slim.get_model_variables('i_resnet'))
        init_mv_resent = slim.assign_from_checkpoint_fn(
            os.path.join(FLAGS.pretrained_path, 'mv_resnet.chkp'),
            slim.get_model_variables('mv_resnet'))
        init_r_resent = slim.assign_from_checkpoint_fn(
            os.path.join(FLAGS.pretrained_path, 'r_resnet.chkp'),
            slim.get_model_variables('r_resnet'))

    with tf.name_scope('video_dataset'):
        train_data = dataset.buildTrainDataset_v2(FLAGS.train_list,
                                                  FLAGS.data_path,
                                                  FLAGS.num_segments,
                                                  batch_size=FLAGS.batch_size,
                                                  augment=False,
                                                  shuffle=True,
                                                  num_threads=1,
                                                  buffer=100)
        test_data = dataset.buildTestDataset(FLAGS.valid_list,
                                             FLAGS.data_path,
                                             FLAGS.num_segments,
                                             batch_size=FLAGS.batch_size,
                                             num_threads=1,
                                             buffer=30)

        with tf.name_scope('dataset_iterator'):
            it = tf.data.Iterator.from_structure(train_data.output_types,
                                                 train_data.output_shapes)
            next_data = it.get_next()
            init_data = it.make_initializer(train_data)
            it_test = tf.data.Iterator.from_structure(test_data.output_types,
                                                      test_data.output_shapes)
            next_test_data = it_test.get_next()
            init_test_data = it_test.make_initializer(train_data)

    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)
    sess = tf.Session(config=config)

    with tf.name_scope('writer'):

        merged = tf.summary.merge_all()
        if not tf.gfile.Exists(FLAGS.log_path):
            tf.gfile.MakeDirs(FLAGS.log_path)
        previous_runs = os.listdir(FLAGS.log_path)
        if len(previous_runs) == 0:
            run_number = 1
        else:
            run_number = len(previous_runs) + 1
        logdir = 'run_%02d' % run_number
        tf.gfile.MakeDirs(os.path.join(FLAGS.log_path, logdir))
        writer = tf.summary.FileWriter(os.path.join(FLAGS.log_path, logdir),
                                       sess.graph)

    with tf.name_scope('saver'):

        if not tf.gfile.Exists(FLAGS.save_path):
            tf.gfile.MakeDirs(FLAGS.save_path)

        i_saver = tf.train.Saver(i_variable_list)
        mv_saver = tf.train.Saver(mv_variable_list)
        r_saver = tf.train.Saver(r_variable_list)
        i_resnet_saver = tf.train.Saver(i_resnet_variables)
        mv_resnet_saver = tf.train.Saver(mv_resnet_variables)
        r_resnet_saver = tf.train.Saver(r_resnet_variables)

    with tf.name_scope('intialization'):
        sess.run(init_var)
        sess.run(init_data)

        init_i_resent(sess)
        init_mv_resent(sess)
        init_r_resent(sess)
    '''
    Main training loop
    '''
    combine_acc = 0
    i_acc = 0
    mv_acc = 0
    r_acc = 0
    start_time = time.time()
    for step in range(FLAGS.max_steps):

        # Validation

        if (step) % 1000 == 0:
            combine_classes = []
            mv_classes = []
            i_classes = []
            r_classes = []
            gt_label = []
            sess.run(init_test_data)

            for i in range(100):
                ti_arr, tmv_arr, tr_arr, tlabel = sess.run(next_test_data)
                print(i)
                i_class, mv_class, r_class, com_class = sess.run(
                    [i_pred, mv_pred, r_pred, pred_class],
                    feed_dict={
                        mv_placeholder: tmv_arr,
                        i_placeholder: ti_arr,
                        r_placeholder: tr_arr,
                        label_placeholder: tlabel
                    })
                combine_classes = np.append(combine_classes, com_class)
                mv_classes = np.append(mv_classes, mv_class)
                i_classes = np.append(i_classes, i_class)
                r_classes = np.append(r_classes, r_class)
                gt_label = np.append(gt_label, tlabel)

            combine_acc = np.sum((combine_classes == gt_label)) / gt_label.size
            i_acc = np.sum((i_classes == gt_label)) / gt_label.size
            mv_acc = np.sum((mv_classes == gt_label)) / gt_label.size
            r_acc = np.sum((r_classes == gt_label)) / gt_label.size

            print('Step %d finished with accuracy: %f , %f , %f, %f' %
                  (step, i_acc, mv_acc, r_acc, combine_acc))

        # Training procedure

        i_arr, mv_arr, r_arr, label = sess.run(next_data)
        summary, _, _, _, _, _, _, _, pred = sess.run(
            [
                merged, mv_fc_opt, mv_res_opt, i_fc_opt, i_res_opt, r_fc_opt,
                r_res_opt, fusion_opt, pred_class
            ],
            feed_dict={
                mv_placeholder: mv_arr,
                i_placeholder: i_arr,
                r_placeholder: r_arr,
                label_placeholder: label,
                combine_value_: combine_acc,
                i_value_: i_acc,
                mv_value_: mv_acc,
                r_value_: r_acc
            })
        print(r_arr.shape)
        print(label)
        print(pred)
        if (step) % 10 == 0:
            duration = time.time() - start_time
            print('Step %d: %.3f sec' % (step, duration))

            writer.add_summary(summary, step)
            start_time = time.time()

        # Model Saving

        if (step) % 5000 == 0 and not step == 0:
            i_saver.save(sess,
                         os.path.join(FLAGS.save_path, 'i_model.chkp'),
                         global_step=step)
            mv_saver.save(sess,
                          os.path.join(FLAGS.save_path, 'mv_model.chkp'),
                          global_step=step)
            r_saver.save(sess,
                         os.path.join(FLAGS.save_path, 'r_model.chkp'),
                         global_step=step)

        if (step) % 10000 == 0 and not step == 0:
            i_resnet_saver.save(sess,
                                os.path.join(FLAGS.save_path, 'i_resnet.chkp'),
                                global_step=step)
            mv_resnet_saver.save(sess,
                                 os.path.join(FLAGS.save_path,
                                              'mv_resnet.chkp'),
                                 global_step=step)
            r_resnet_saver.save(sess,
                                os.path.join(FLAGS.save_path, 'r_resnet.chkp'),
                                global_step=step)

    writer.close()