def build(self): # Input self.input = tf.placeholder( dtype=tf.float32, shape=[None, self.img_size[0], self.img_size[1], self.img_size[2]]) self.input_mean = tfutils.mean_value(self.input, self.img_mean) if self.base_net == 'vgg16': with slim.arg_scope(vgg.vgg_arg_scope()): outputs, end_points = vgg.vgg_16(self.input_mean, self.num_classes) self.prob = tf.nn.softmax(outputs, -1) self.logits = outputs elif self.base_net == 'res50': with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50( self.input_mean, self.num_classes, is_training=self.is_train) self.prob = tf.nn.softmax(net[:, 0, 0, :], -1) self.logits = net[:, 0, 0, :] elif self.base_net == 'res101': with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_101( self.input_mean, self.num_classes, is_training=self.is_train) self.prob = tf.nn.softmax(net[:, 0, 0, :], -1) self.logits = net[:, 0, 0, :] elif self.base_net == 'res152': with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_152( self.input_mean, self.num_classes, is_training=self.is_train) self.prob = tf.nn.softmax(net[:, 0, 0, :], -1) self.logits = net[:, 0, 0, :] else: raise ValueError( 'base network should be vgg16, res50, -101, -152...') self.gt = tf.placeholder(dtype=tf.int32, shape=[None]) # self.var_list = tf.trainable_variables() if self.is_train: self.loss()
# Define the output folder out_file = "gw_{mode}_{network}_{feature_name}_{size}".format( mode=args.mode, network=args.network, feature_name=args.feature_name, size=args.img_size) print("Create networks...") if args.network == "resnet": ft_output = resnet.create_resnet(images, resnet_out=args.feature_name, resnet_version=args.resnet_version, is_training=False) # create network with slim.arg_scope(slim_utils.resnet_arg_scope(is_training=False)): _, end_points = resnet_v1.resnet_v1_152(images, 1000) # 1000 is the number of softmax class elif args.network == "vgg": _, end_points = vgg.vgg_16(images, is_training=False, dropout_keep_prob=1.0) ft_name = os.path.join("vgg_16", args.feature_name) ft_output = end_points[ft_name] else: assert False, "Incorrect Network" extract_features( img_input = images, ft_output = ft_output, dataset_cstor = dataset_cstor, dataset_args = {"folder": args.data_dir, "image_builder":image_builder, "crop_builder":crop_builder, "dataset_name": args.dataset_name},
def main(_): with tf.name_scope('input_placeholder'): mv_placeholder = tf.placeholder(tf.float32, shape=(None, FLAGS.num_segments, 224, 224, 3 ), name = 'mv_frame') flow_placeholder = tf.placeholder(tf.float32, shape=(None, FLAGS.num_segments, 224, 224, 3 ), name = 'flow_frame') i_placeholder = tf.placeholder(tf.float32, shape=(None, FLAGS.num_segments, 224, 224, 3 ), name = 'i_frame') r_placeholder = tf.placeholder(tf.float32, shape=(None, FLAGS.num_segments, 224, 224, 3 ), name = 'r_frame') with tf.name_scope('label_placeholder'): label_placeholder = tf.placeholder(tf.int32, shape=(None), name = 'labels') with tf.name_scope('accuracy'): combine_value_ = tf.placeholder(tf.float32, shape=(), name = 'accuracy') i_value_ = tf.placeholder(tf.float32, shape=(), name = 'accuracy') mv_value_ = tf.placeholder(tf.float32, shape=(), name = 'accuracy') r_value_ = tf.placeholder(tf.float32, shape=(), name = 'accuracy') tf.summary.scalar('combine_acc', combine_value_) tf.summary.scalar('i_acc', i_value_) tf.summary.scalar('mv_acc', mv_value_) tf.summary.scalar('r_acc', r_value_) print('Finish placeholder.') with tf.name_scope('flatten_input'): b_size = tf.shape(mv_placeholder)[0] flat_mv = tf.reshape(mv_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3]) # Since we have mulitple segments in a single video flat_flow = tf.reshape(flow_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3]) flat_i = tf.reshape(i_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3]) flat_r = tf.reshape(r_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3]) with tf.variable_scope('fc_var') as var_scope: mv_weights = { 'w1': _variable_with_weight_decay('wmv1', [2048 , 512 ], 0.0005), 'w2': _variable_with_weight_decay('wmv2', [512 , N_CLASS], 0.0005) } mv_biases = { 'b1': _variable_with_weight_decay('bmv1', [ 512 ], 0.00), 'b2': _variable_with_weight_decay('bmv2', [ N_CLASS ], 0.00) } i_weights = { 'w1': _variable_with_weight_decay('wi1', [2048 , 512 ], 0.0005), 'w2': _variable_with_weight_decay('wi2', [512 , N_CLASS], 0.0005) } i_biases = { 'b1': _variable_with_weight_decay('bi1', [ 512 ], 0.00), 'b2': _variable_with_weight_decay('bi2', [ N_CLASS ], 0.00) } r_weights = { 'w1': _variable_with_weight_decay('wr1', [2048 , 512 ], 0.0005), 'w2': _variable_with_weight_decay('wr2', [512 , N_CLASS], 0.0005) } r_biases = { 'b1': _variable_with_weight_decay('br1', [ 512 ], 0.00), 'b2': _variable_with_weight_decay('br2', [ N_CLASS ], 0.00) } with tf.variable_scope('fusion_var'): fusion = tf.get_variable('fusion', [3], initializer=tf.contrib.layers.xavier_initializer()) print('Finish Flatten.') with tf.device('/gpu:0'): with tf.name_scope('FLMG'): mv_res = tf.concat([flat_mv, flat_r], axis = -1) mv = slim.conv2d(mv_res, 8, kernel_size=[3, 3], scope = 'FLMG_1') mv = slim.conv2d(mv, 8, kernel_size=[3, 3], scope = 'FLMG_2') mv = slim.conv2d(mv, 6, kernel_size=[3, 3], scope = 'FLMG_3') mv = slim.conv2d(mv, 4, kernel_size=[3, 3], scope = 'FLMG_4') mv = slim.conv2d(mv, 2, kernel_size=[3, 3], scope = 'FLMG_5') mv = slim.conv2d(mv, 3, kernel_size=[3, 3], scope = 'FLMG_6') with tf.name_scope('FLMG_LOSS'): # The cost function -- l2 mse matrix_pow_2 = tf.pow(tf.subtract(mv, flat_flow), 2) matrix_norm = tf.reduce_sum(matrix_pow_2, axis = [1,2,3]) flmg_loss = tf.reduce_mean(matrix_norm) tf.summary.scalar('flmg_loss', flmg_loss) with slim.arg_scope(resnet_v1.resnet_arg_scope()): i_feature, _ = resnet_v1.resnet_v1_152(flat_i, num_classes=None, is_training=True, scope='i_resnet') mv_feature, _ = resnet_v1.resnet_v1_50(mv, num_classes=None, is_training=True, scope='mv_resnet') r_feature, _ = resnet_v1.resnet_v1_50(flat_r, num_classes=None, is_training=True, scope='r_resnet') with tf.name_scope('reshape_feature'): i_feature = tf.reshape(i_feature, [-1, 2048]) mv_feature = tf.reshape(mv_feature, [-1, 2048]) r_feature = tf.reshape(r_feature, [-1, 2048]) with tf.name_scope('inference_model'): i_sc, i_pred = model.inference_feature (i_feature, i_weights, i_biases, FLAGS.num_segments, N_CLASS, name = 'i_inf') mv_sc, mv_pred = model.inference_feature (mv_feature, mv_weights, mv_biases, FLAGS.num_segments, N_CLASS, name = 'mv_inf') r_sc, r_pred = model.inference_feature (r_feature, r_weights, r_biases, FLAGS.num_segments, N_CLASS, name = 'r_inf') combine_sc, pred_class = model.inference_fusion ( i_sc, mv_sc, r_sc, fusion) print('Finish Model.') with tf.name_scope('classiciation_loss'): one_hot_labels = tf.one_hot(label_placeholder, N_CLASS) mv_class_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = mv_sc, labels = one_hot_labels, dim=1)) i_class_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = i_sc, labels = one_hot_labels, dim=1)) r_class_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = r_sc, labels = one_hot_labels, dim=1)) tf.summary.scalar('mv_cls_loss', mv_class_loss) tf.summary.scalar('i_cls_loss', i_class_loss) tf.summary.scalar('r_cls_loss', r_class_loss) combine_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = combine_sc, labels = one_hot_labels, dim=1)) tf.summary.scalar('fuse_cls_loss', combine_loss) total_loss = combine_loss + i_class_loss + mv_class_loss + r_class_loss + flmg_loss tf.summary.scalar('tot_cls_loss', total_loss) with tf.name_scope('weigh_decay'): weight_loss = sum(tf.get_collection('losses')) tf.summary.scalar('eight_decay_loss', weight_loss) ''' with tf.name_scope('training_var_list'): mv_variable_list = list ( set(mv_weights.values()) | set(mv_biases.values()) ) mv_resnet_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='mv_resnet') i_variable_list = list ( set(i_weights.values()) | set(i_biases.values()) ) i_resnet_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='i_resnet') r_variable_list = list ( set(r_weights.values()) | set(r_biases.values()) ) r_resnet_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='r_resnet') with tf.name_scope('summary_var'): _variable_summaries(mv_weights['w1']) _variable_summaries(i_weights['w2']) _variable_summaries(r_weights['w2']) _variable_summaries(mv_resnet_variables[0]) _variable_summaries(i_resnet_variables[0]) _variable_summaries(r_resnet_variables[0]) _variable_summaries(fusion) print('Finish variables.') ''' with tf.name_scope('optimizer'): ''' mv_fc_opt = tf.train.AdamOptimizer(FLAGS.mv_lr).minimize(mv_class_loss + weight_loss, var_list = mv_variable_list) mv_res_opt = tf.train.AdamOptimizer(FLAGS.resnet_lr).minimize(mv_class_loss, var_list = mv_resnet_variables) i_fc_opt = tf.train.AdamOptimizer(FLAGS.i_lr).minimize(i_class_loss + weight_loss, var_list = i_variable_list) i_res_opt = tf.train.AdamOptimizer(FLAGS.resnet_lr).minimize(i_class_loss, var_list = i_resnet_variables) r_fc_opt = tf.train.AdamOptimizer(FLAGS.r_lr).minimize(r_class_loss + weight_loss, var_list = r_variable_list) r_res_opt = tf.train.AdamOptimizer(FLAGS.resnet_lr).minimize(r_class_loss, var_list = r_resnet_variables) fusion_opt = tf.train.GradientDescentOptimizer(10e-6).minimize(combine_loss, var_list = fusion) ''' train_opt = tf.train.AdamOptimizer(FLAGS.tot_lr).minimize(total_loss, var_list = tf.trainable_variables()) print('Finish Optimizer.') with tf.name_scope('init_function'): init_var = tf.global_variables_initializer() with tf.name_scope('video_dataset'): train_data = dataset.buildTrainDataset_v2(FLAGS.train_list, FLAGS.data_path, FLAGS.num_segments, batch_size = FLAGS.batch_size, augment = False, shuffle = True, num_threads=2, buffer=100) test_data = dataset.buildTestDataset(FLAGS.valid_list, FLAGS.data_path, FLAGS.num_segments, batch_size = FLAGS.batch_size, num_threads = 2, buffer = 30) with tf.name_scope('dataset_iterator'): it = tf.contrib.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes) next_data = it.get_next() init_data = it.make_initializer(train_data) it_test = tf.contrib.data.Iterator.from_structure(test_data.output_types, test_data.output_shapes) next_test_data = it_test.get_next() init_test_data = it_test.make_initializer(test_data) print('Finish Dataset.') restore_var = [v for v in tf.trainable_variables() if ('Adam' not in v.name)] first_restore_var = [v for v in tf.trainable_variables() if ('Adam' not in v.name and 'FLMG' not in v.name)] first_saver = tf.train.Saver(var_list=first_restore_var) my_saver = tf.train.Saver(var_list=restore_var, max_to_keep=5) config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=config) with tf.name_scope('writer'): merged = tf.summary.merge_all() if not tf.gfile.Exists(FLAGS.log_path): tf.gfile.MakeDirs(FLAGS.log_path) previous_runs = os.listdir(FLAGS.log_path) if len(previous_runs) == 0: run_number = 1 else: run_number = len(previous_runs) + 1 logdir = 'run_%02d' % run_number tf.gfile.MakeDirs(os.path.join(FLAGS.log_path, logdir)) writer = tf.summary.FileWriter(os.path.join(FLAGS.log_path, logdir), sess.graph) with tf.name_scope('saver'): if not tf.gfile.Exists(FLAGS.save_path): tf.gfile.MakeDirs(FLAGS.save_path) ''' i_saver = tf.train.Saver(i_variable_list) mv_saver = tf.train.Saver(mv_variable_list) r_saver = tf.train.Saver(r_variable_list) i_resnet_saver = tf.train.Saver(i_resnet_variables) mv_resnet_saver = tf.train.Saver(mv_resnet_variables) r_resnet_saver = tf.train.Saver(r_resnet_variables) ''' with tf.name_scope('intialization'): sess.run(init_var) sess.run(init_data) sess.run(init_test_data) #init_i_resent (sess) #init_mv_resent (sess) #init_r_resent(sess) ''' i_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'i_model.chkp'+FLAGS.steps)) mv_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'mv_model.chkp'+FLAGS.steps)) r_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'r_model.chkp'+FLAGS.steps)) i_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'i_resnet.chkp'+FLAGS.steps)) mv_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'mv_resnet.chkp'+FLAGS.steps)) r_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'r_resnet.chkp'+FLAGS.steps)) ''' try: my_saver.restore(sess, FLAGS.continue_training) except: # First train first_saver.restore(sess, FLAGS.continue_training) ''' i_resnet_saver = tf.train.Saver(i_resnet_variables) mv_resnet_saver = tf.train.Saver(mv_resnet_variables) r_resnet_saver = tf.train.Saver(r_resnet_variables) i_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'i_resnet.chkp'+FLAGS.steps)) mv_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'mv_resnet.chkp'+FLAGS.steps)) r_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'r_resnet.chkp'+FLAGS.steps)) ''' print('Finish Loading Pretrained Model.') ''' Main training loop ''' combine_acc = 0 i_acc = 0 mv_acc = 0 r_acc = 0 start_time = time.time() for step in range(FLAGS.max_steps): # Validation if (step) % 1000 == 0 and step > 0: combine_classes = [] mv_classes = [] i_classes = [] r_classes = [] gt_label = [] for i in range(100): ti_arr, tmv_arr, tr_arr, tlabel = sess.run(next_test_data) i_class, mv_class, r_class, com_class = sess.run([i_pred, mv_pred, r_pred, pred_class], feed_dict={mv_placeholder: tmv_arr, i_placeholder: ti_arr, r_placeholder: tr_arr , label_placeholder : tlabel }) combine_classes = np.append(combine_classes, com_class) mv_classes = np.append(mv_classes, mv_class) i_classes = np.append(i_classes, i_class) r_classes = np.append(r_classes, r_class) gt_label = np.append(gt_label, tlabel) combine_acc = np.sum((combine_classes == gt_label)) / gt_label.size i_acc = np.sum((i_classes == gt_label)) / gt_label.size mv_acc = np.sum((mv_classes == gt_label)) / gt_label.size r_acc = np.sum((r_classes == gt_label)) / gt_label.size print('Step %d finished with accuracy: %f , %f , %f, %f' % (step, i_acc, mv_acc, r_acc, combine_acc)) # Training procedure i_arr, mv_arr, r_arr, flow_arr, label = sess.run(next_data) summary, _, pred, loss1, loss2, loss3, loss4, loss5 = sess.run([merged, train_opt, pred_class, mv_class_loss, i_class_loss, r_class_loss, combine_loss, flmg_loss], feed_dict={mv_placeholder: mv_arr, i_placeholder: i_arr, flow_placeholder: flow_arr, r_placeholder: r_arr , label_placeholder : label, combine_value_: combine_acc, i_value_ : i_acc, mv_value_: mv_acc, r_value_ : r_acc}) if (step) % 10 == 0 : duration = time.time() - start_time print('Step %d: %.3f sec' % (step, duration), 'mv_loss:', loss1, 'i_loss:', loss2, 'r_loss:', loss3, 'fusion_loss:', loss4, 'flmg_loss:', loss5) print('GT:', label) print('Pred:', pred) writer.add_summary(summary, step) start_time = time.time() # Model Saving if (step) % 1000 == 0 and not step == 0 : ''' i_saver.save(sess, os.path.join(FLAGS.save_path, 'i_model.chkp'), global_step = step) mv_saver.save(sess, os.path.join(FLAGS.save_path, 'mv_model.chkp'), global_step = step) r_saver.save(sess, os.path.join(FLAGS.save_path, 'r_model.chkp'), global_step = step) i_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'i_resnet.chkp'), global_step = step) mv_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'mv_resnet.chkp'), global_step = step) r_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'r_resnet.chkp'), global_step = step) ''' my_saver.save(sess, os.path.join(FLAGS.save_path, 'all_net.chkp'), global_step = step) #if (step) % 10000 == 0 and not step == 0 : # i_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'i_resnet.chkp'), global_step = step) # mv_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'mv_resnet.chkp'), global_step = step) # r_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'r_resnet.chkp'), global_step = step) writer.close()
def run_training(): config = tf.ConfigProto(allow_soft_placement=True) sess = tf.Session(config=config) # sess = tf.Session() # config=tf.ConfigProto(log_device_placement=True)) # create input path and labels np.array from csv annotations df_annos = pd.read_csv(ANNOS_CSV, index_col=0) df_annos = df_annos.sample(frac=1).reset_index( drop=True) # shuffle the whole datasets if DATA == 'l8': path_col = ['l8_vis_jpg'] elif DATA == 's1': path_col = ['s1_vis_jpg'] elif DATA == 'l8s1': path_col = ['l8_vis_jpg', 's1_vis_jpg'] input_files_train = JPG_DIR + df_annos.loc[df_annos.partition == 'train', path_col].values input_labels_train = df_annos.loc[df_annos.partition == 'train', 'pop_density_log2'].values input_files_val = JPG_DIR + df_annos.loc[df_annos.partition == 'val', path_col].values input_labels_val = df_annos.loc[df_annos.partition == 'val', 'pop_density_log2'].values input_id_train = df_annos.loc[df_annos.partition == 'train', 'village_id'].values input_id_val = df_annos.loc[df_annos.partition == 'val', 'village_id'].values print('input_files_train shape:', input_files_train.shape) train_set_size = len(input_labels_train) # data input with tf.device('/cpu:0'): train_images_batch, train_labels_batch, _ = \ dataset.input_batches(FLAGS.batch_size, FLAGS.output_size, input_files_train, input_labels_train, input_id_train, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNEL, regression=True, augmentation=True, normalization=True) val_images_batch, val_labels_batch, _ = \ dataset.input_batches(FLAGS.batch_size, FLAGS.output_size, input_files_val, input_labels_val, input_id_val, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNEL, regression=True, augmentation=False, normalization=True) images_placeholder = tf.placeholder( tf.float32, shape=[None, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNEL]) labels_placeholder = tf.placeholder(tf.float32, shape=[ None, ]) print('finish data input') TRAIN_BATCHES_PER_EPOCH = int( train_set_size / FLAGS.batch_size) # number of training batches/steps in each epoch MAX_STEPS = TRAIN_BATCHES_PER_EPOCH * FLAGS.max_epoch # total number of training batches/steps # CNN forward reference if MODEL == 'vgg': with slim.arg_scope( vgg.vgg_arg_scope(weight_decay=FLAGS.weight_decay)): outputs, _ = vgg.vgg_16(images_placeholder, num_classes=FLAGS.output_size, dropout_keep_prob=FLAGS.dropout_keep, is_training=True) outputs = tf.squeeze( outputs ) # change shape from (B,1) to (B,), same as label input if MODEL == 'resnet': with slim.arg_scope(resnet_v1.resnet_arg_scope()): outputs, _ = resnet_v1.resnet_v1_152(images_placeholder, num_classes=FLAGS.output_size, is_training=True) outputs = tf.squeeze( outputs ) # change shape from (B,1) to (B,), same as label input # loss labels_real = tf.pow(2.0, labels_placeholder) outputs_real = tf.pow(2.0, outputs) # only loss_log2_mse are used for gradient calculate, model minimize this value loss_log2_mse = tf.reduce_mean(tf.squared_difference( labels_placeholder, outputs), name='loss_log2_mse') loss_real_rmse = tf.sqrt(tf.reduce_mean( tf.squared_difference(labels_real, outputs_real)), name='loss_real_rmse') loss_real_mae = tf.losses.absolute_difference(labels_real, outputs_real) tf.summary.scalar('loss_log2_mse', loss_log2_mse) tf.summary.scalar('loss_real_rmse', loss_real_rmse) tf.summary.scalar('loss_real_mae', loss_real_mae) # accuracy (R2) def r_sqaured(labels, outputs): sst = tf.reduce_sum( tf.squared_difference(labels, tf.reduce_mean(labels))) sse = tf.reduce_sum(tf.squared_difference(labels, outputs)) return (1.0 - tf.div(sse, sst)) r2_log2 = r_sqaured(labels_placeholder, outputs) r2_real = r_sqaured(labels_real, outputs_real) tf.summary.scalar('r2_log2', r2_log2) tf.summary.scalar('r2_real', r2_real) # determine the model vairables to restore from pre-trained checkpoint if MODEL == 'vgg': if DATA == 'l8s1': model_variables = slim.get_variables_to_restore( exclude=['vgg_16/fc8', 'vgg_16/conv1']) else: model_variables = slim.get_variables_to_restore( exclude=['vgg_16/fc8']) if MODEL == 'resnet': model_variables = slim.get_variables_to_restore( exclude=['resnet_v1_152/logits', 'resnet_v1_152/conv1']) # training step and learning rate global_step = tf.Variable(0, name='global_step', trainable=False) #, dtype=tf.int64) learning_rate = tf.train.exponential_decay( FLAGS.learning_rate, # initial learning rate global_step=global_step, # current step decay_steps=MAX_STEPS, # total numbers step to decay decay_rate=FLAGS.lr_decay_rate ) # final learning rate = FLAGS.learning_rate * decay_rate tf.summary.scalar('learning_rate', learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) # optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate) # optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) # to only update gradient in first and last layer # vars_update = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'vgg_16/(conv1|fc8)') # print('variables to update in traing: ', vars_update) train_op = optimizer.minimize( loss_log2_mse, global_step=global_step) #, var_list = vars_update) # summary output in tensorboard summary = tf.summary.merge_all() summary_writer_train = tf.summary.FileWriter( os.path.join(LOG_DIR, 'log_train'), sess.graph) summary_writer_val = tf.summary.FileWriter( os.path.join(LOG_DIR, 'log_val'), sess.graph) # variable initialize init = tf.global_variables_initializer() sess.run(init) # restore the model from pre-trained checkpoint restorer = tf.train.Saver(model_variables) restorer.restore(sess, PRETRAIN_WEIGHTS) print('loaded pre-trained weights: ', PRETRAIN_WEIGHTS) # saver object to save checkpoint during training saver = tf.train.Saver(tf.global_variables(), max_to_keep=10) print('start training...') epoch = 0 best_r2 = -float('inf') for step in xrange(MAX_STEPS): if step % TRAIN_BATCHES_PER_EPOCH == 0: epoch += 1 start_time = time.time() # record the time used for each batch images_out, labels_out = sess.run( [train_images_batch, train_labels_batch]) # inputs of this batch, numpy array format duration_batch = time.time() - start_time if step == 0: print("finished reading batch data") print("images_out shape:", images_out.shape) feed_dict = { images_placeholder: images_out, labels_placeholder: labels_out } _, train_loss, train_accuracy, train_outputs, lr = \ sess.run([train_op, loss_log2_mse, r2_log2, outputs, learning_rate], feed_dict=feed_dict) duration = time.time() - start_time if step % 10 == 0 or ( step + 1) == MAX_STEPS: # print traing loss every 10 batches print('Step %d epoch %d lr %.3e: log2 MSE loss = %.4f log2 R2 = %.4f (%.3f sec, %.3f sec(each batch))' \ % (step, epoch, lr, train_loss, train_accuracy, duration*10, duration_batch)) summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer_train.add_summary(summary_str, step) summary_writer_train.flush() if step % 50 == 0 or ( step + 1 ) == MAX_STEPS: # calculate and print validation loss every 50 batches images_out, labels_out = sess.run( [val_images_batch, val_labels_batch]) feed_dict = { images_placeholder: images_out, labels_placeholder: labels_out } val_loss, val_accuracy = sess.run([loss_log2_mse, r2_log2], feed_dict=feed_dict) print('Step %d epoch %d: val log2 MSE = %.4f val log2 R2 = %.4f ' % (step, epoch, val_loss, val_accuracy)) summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer_val.add_summary(summary_str, step) summary_writer_val.flush() # in each epoch, if the validation R2 is higher than best R2, save the checkpoint if step % (TRAIN_BATCHES_PER_EPOCH - TRAIN_BATCHES_PER_EPOCH % 50) == 0: if val_accuracy > best_r2: best_r2 = val_accuracy checkpoint_file = os.path.join(LOG_DIR, 'model.ckpt') saver.save(sess, checkpoint_file, global_step=step, write_state=True)
def main(_): with tf.name_scope('input_placeholder'): mv_placeholder = tf.placeholder(tf.float32, shape=(None, FLAGS.num_segments, 224, 224, 3), name='mv_frame') i_placeholder = tf.placeholder(tf.float32, shape=(None, FLAGS.num_segments, 224, 224, 3), name='i_frame') r_placeholder = tf.placeholder(tf.float32, shape=(None, FLAGS.num_segments, 224, 224, 3), name='r_frame') with tf.name_scope('label_placeholder'): label_placeholder = tf.placeholder(tf.int32, shape=(None), name='labels') with tf.name_scope('accuracy'): combine_value_ = tf.placeholder(tf.float32, shape=(), name='accuracy') i_value_ = tf.placeholder(tf.float32, shape=(), name='accuracy') mv_value_ = tf.placeholder(tf.float32, shape=(), name='accuracy') r_value_ = tf.placeholder(tf.float32, shape=(), name='accuracy') tf.summary.scalar('combine_acc', combine_value_) tf.summary.scalar('i_acc', i_value_) tf.summary.scalar('mv_acc', mv_value_) tf.summary.scalar('r_acc', r_value_) with tf.name_scope('flatten_input'): b_size = tf.shape(mv_placeholder)[0] flat_mv = tf.reshape( mv_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3 ]) # Since we have mulitple segments in a single video flat_i = tf.reshape(i_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3]) flat_r = tf.reshape(r_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3]) with tf.variable_scope('fc_var') as var_scope: mv_weights = { 'w1': _variable_with_weight_decay('wmv1', [2048, 512], 0.0005), 'w2': _variable_with_weight_decay('wmv2', [512, N_CLASS], 0.0005) } mv_biases = { 'b1': _variable_with_weight_decay('bmv1', [512], 0.00), 'b2': _variable_with_weight_decay('bmv2', [N_CLASS], 0.00) } i_weights = { 'w1': _variable_with_weight_decay('wi1', [2048, 512], 0.0005), 'w2': _variable_with_weight_decay('wi2', [512, N_CLASS], 0.0005) } i_biases = { 'b1': _variable_with_weight_decay('bi1', [512], 0.00), 'b2': _variable_with_weight_decay('bi2', [N_CLASS], 0.00) } r_weights = { 'w1': _variable_with_weight_decay('wr1', [2048, 512], 0.0005), 'w2': _variable_with_weight_decay('wr2', [512, N_CLASS], 0.0005) } r_biases = { 'b1': _variable_with_weight_decay('br1', [512], 0.00), 'b2': _variable_with_weight_decay('br2', [N_CLASS], 0.00) } with tf.variable_scope('fusion_var'): fusion = tf.get_variable( 'fusion', [3], initializer=tf.contrib.layers.xavier_initializer()) with tf.device('/gpu:' + str(gpunumber)): with slim.arg_scope(resnet_v1.resnet_arg_scope()): i_feature, _ = resnet_v1.resnet_v1_152(flat_mv, num_classes=None, is_training=True, scope='i_resnet') mv_feature, _ = resnet_v1.resnet_v1_50(flat_i, num_classes=None, is_training=True, scope='mv_resnet') r_feature, _ = resnet_v1.resnet_v1_50(flat_r, num_classes=None, is_training=True, scope='r_resnet') with tf.name_scope('reshape_feature'): i_feature = tf.reshape(i_feature, [-1, 2048]) mv_feature = tf.reshape(mv_feature, [-1, 2048]) r_feature = tf.reshape(r_feature, [-1, 2048]) with tf.name_scope('inference_model'): i_sc, i_pred = model.inference_feature(i_feature, i_weights, i_biases, FLAGS.num_segments, N_CLASS, name='i_inf') mv_sc, mv_pred = model.inference_feature(mv_feature, mv_weights, mv_biases, FLAGS.num_segments, N_CLASS, name='mv_inf') r_sc, r_pred = model.inference_feature(r_feature, r_weights, r_biases, FLAGS.num_segments, N_CLASS, name='r_inf') combine_sc, pred_class = model.inference_fusion( i_sc, mv_sc, r_sc, fusion) with tf.name_scope('classiciation_loss'): one_hot_labels = tf.one_hot(label_placeholder, N_CLASS) mv_class_loss = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits_v2(logits=mv_sc, labels=one_hot_labels, dim=1)) i_class_loss = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits_v2(logits=i_sc, labels=one_hot_labels, dim=1)) r_class_loss = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits_v2(logits=r_sc, labels=one_hot_labels, dim=1)) tf.summary.scalar('mv_class_loss', mv_class_loss) tf.summary.scalar('i_class_loss', i_class_loss) tf.summary.scalar('r_class_loss', r_class_loss) combine_loss = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits_v2(logits=combine_sc, labels=one_hot_labels, dim=1)) tf.summary.scalar('combine_class_loss', combine_loss) with tf.name_scope('weigh_decay'): weight_loss = sum(tf.get_collection('losses')) tf.summary.scalar('eight_decay_loss', weight_loss) with tf.name_scope('training_var_list'): mv_variable_list = list( set(mv_weights.values()) | set(mv_biases.values())) mv_resnet_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='mv_resnet') i_variable_list = list( set(i_weights.values()) | set(i_biases.values())) i_resnet_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='i_resnet') r_variable_list = list( set(r_weights.values()) | set(r_biases.values())) r_resnet_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='r_resnet') with tf.name_scope('summary_var'): _variable_summaries(mv_weights['w1']) _variable_summaries(i_weights['w2']) _variable_summaries(r_weights['w2']) _variable_summaries(mv_resnet_variables[0]) _variable_summaries(i_resnet_variables[0]) _variable_summaries(r_resnet_variables[0]) _variable_summaries(fusion) with tf.name_scope('optimizer'): mv_fc_opt = tf.train.AdamOptimizer(FLAGS.mv_lr).minimize( mv_class_loss + weight_loss, var_list=mv_variable_list) mv_res_opt = tf.train.AdamOptimizer(FLAGS.resnet_lr).minimize( mv_class_loss, var_list=mv_resnet_variables) i_fc_opt = tf.train.AdamOptimizer(FLAGS.i_lr).minimize( i_class_loss + weight_loss, var_list=i_variable_list) i_res_opt = tf.train.AdamOptimizer(FLAGS.resnet_lr).minimize( i_class_loss, var_list=i_resnet_variables) r_fc_opt = tf.train.AdamOptimizer(FLAGS.r_lr).minimize( r_class_loss + weight_loss, var_list=r_variable_list) r_res_opt = tf.train.AdamOptimizer(FLAGS.resnet_lr).minimize( r_class_loss, var_list=r_resnet_variables) fusion_opt = tf.train.GradientDescentOptimizer(10e-6).minimize( combine_loss, var_list=fusion) with tf.name_scope('init_function'): init_var = tf.global_variables_initializer() init_i_resent = slim.assign_from_checkpoint_fn( os.path.join(FLAGS.pretrained_path, 'i_resnet.chkp'), slim.get_model_variables('i_resnet')) init_mv_resent = slim.assign_from_checkpoint_fn( os.path.join(FLAGS.pretrained_path, 'mv_resnet.chkp'), slim.get_model_variables('mv_resnet')) init_r_resent = slim.assign_from_checkpoint_fn( os.path.join(FLAGS.pretrained_path, 'r_resnet.chkp'), slim.get_model_variables('r_resnet')) with tf.name_scope('video_dataset'): train_data = dataset.buildTrainDataset_v2(FLAGS.train_list, FLAGS.data_path, FLAGS.num_segments, batch_size=FLAGS.batch_size, augment=False, shuffle=True, num_threads=1, buffer=100) test_data = dataset.buildTestDataset(FLAGS.valid_list, FLAGS.data_path, FLAGS.num_segments, batch_size=FLAGS.batch_size, num_threads=1, buffer=30) with tf.name_scope('dataset_iterator'): it = tf.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes) next_data = it.get_next() init_data = it.make_initializer(train_data) it_test = tf.data.Iterator.from_structure(test_data.output_types, test_data.output_shapes) next_test_data = it_test.get_next() init_test_data = it_test.make_initializer(train_data) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=config) with tf.name_scope('writer'): merged = tf.summary.merge_all() if not tf.gfile.Exists(FLAGS.log_path): tf.gfile.MakeDirs(FLAGS.log_path) previous_runs = os.listdir(FLAGS.log_path) if len(previous_runs) == 0: run_number = 1 else: run_number = len(previous_runs) + 1 logdir = 'run_%02d' % run_number tf.gfile.MakeDirs(os.path.join(FLAGS.log_path, logdir)) writer = tf.summary.FileWriter(os.path.join(FLAGS.log_path, logdir), sess.graph) with tf.name_scope('saver'): if not tf.gfile.Exists(FLAGS.save_path): tf.gfile.MakeDirs(FLAGS.save_path) i_saver = tf.train.Saver(i_variable_list) mv_saver = tf.train.Saver(mv_variable_list) r_saver = tf.train.Saver(r_variable_list) i_resnet_saver = tf.train.Saver(i_resnet_variables) mv_resnet_saver = tf.train.Saver(mv_resnet_variables) r_resnet_saver = tf.train.Saver(r_resnet_variables) with tf.name_scope('intialization'): sess.run(init_var) sess.run(init_data) init_i_resent(sess) init_mv_resent(sess) init_r_resent(sess) ''' Main training loop ''' combine_acc = 0 i_acc = 0 mv_acc = 0 r_acc = 0 start_time = time.time() for step in range(FLAGS.max_steps): # Validation if (step) % 1000 == 0: combine_classes = [] mv_classes = [] i_classes = [] r_classes = [] gt_label = [] sess.run(init_test_data) for i in range(100): ti_arr, tmv_arr, tr_arr, tlabel = sess.run(next_test_data) print(i) i_class, mv_class, r_class, com_class = sess.run( [i_pred, mv_pred, r_pred, pred_class], feed_dict={ mv_placeholder: tmv_arr, i_placeholder: ti_arr, r_placeholder: tr_arr, label_placeholder: tlabel }) combine_classes = np.append(combine_classes, com_class) mv_classes = np.append(mv_classes, mv_class) i_classes = np.append(i_classes, i_class) r_classes = np.append(r_classes, r_class) gt_label = np.append(gt_label, tlabel) combine_acc = np.sum((combine_classes == gt_label)) / gt_label.size i_acc = np.sum((i_classes == gt_label)) / gt_label.size mv_acc = np.sum((mv_classes == gt_label)) / gt_label.size r_acc = np.sum((r_classes == gt_label)) / gt_label.size print('Step %d finished with accuracy: %f , %f , %f, %f' % (step, i_acc, mv_acc, r_acc, combine_acc)) # Training procedure i_arr, mv_arr, r_arr, label = sess.run(next_data) summary, _, _, _, _, _, _, _, pred = sess.run( [ merged, mv_fc_opt, mv_res_opt, i_fc_opt, i_res_opt, r_fc_opt, r_res_opt, fusion_opt, pred_class ], feed_dict={ mv_placeholder: mv_arr, i_placeholder: i_arr, r_placeholder: r_arr, label_placeholder: label, combine_value_: combine_acc, i_value_: i_acc, mv_value_: mv_acc, r_value_: r_acc }) print(r_arr.shape) print(label) print(pred) if (step) % 10 == 0: duration = time.time() - start_time print('Step %d: %.3f sec' % (step, duration)) writer.add_summary(summary, step) start_time = time.time() # Model Saving if (step) % 5000 == 0 and not step == 0: i_saver.save(sess, os.path.join(FLAGS.save_path, 'i_model.chkp'), global_step=step) mv_saver.save(sess, os.path.join(FLAGS.save_path, 'mv_model.chkp'), global_step=step) r_saver.save(sess, os.path.join(FLAGS.save_path, 'r_model.chkp'), global_step=step) if (step) % 10000 == 0 and not step == 0: i_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'i_resnet.chkp'), global_step=step) mv_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'mv_resnet.chkp'), global_step=step) r_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'r_resnet.chkp'), global_step=step) writer.close()