def __init__(self, num_in_frames=64, in_features=2048 * 2, nb_classes=1, nb_layers=2, dropout_prob=0): super(SyncI3dResNet, self).__init__() self.params_file = "/home/adrien/Code/human_interaction_SyncI3d/params/rgb_imagenet.pt" self.i3d_net = InceptionI3d(num_in_frames=num_in_frames) self.i3d_net.load_state_dict(torch.load(self.params_file)) self.init_resnet() self.dropout = nn.Dropout(p=dropout_prob) self.in_features = in_features self.nb_classes = nb_classes self.nb_layers = nb_layers feature_sizes = [ self.in_features // 2**i for i in range(self.nb_layers) ] + [self.nb_classes] self.layers = nn.ModuleList([ nn.Linear(feature_sizes[i], feature_sizes[i + 1]) for i in range(self.nb_layers) ])
def build_model( frames, class_num, dropout_keep_prob, reuse, training, ): with tf.variable_scope('RGB', reuse=reuse): encoder_model = InceptionI3d(class_num, spatial_squeeze=True, final_endpoint='Logits') logits, endpoints = encoder_model(frames, is_training=training, dropout_keep_prob=dropout_keep_prob) predictions = tf.nn.softmax(logits) with tf.variable_scope('Reconstructor', reuse=reuse): """Reconstructor This reconstructor is used to reconstruct video from high-dimensional features extracted by I3D. It output a video that trained to be close to the original video, but in a reversed order. """ reconstructor = Reconstructor(training=training) reconstructed_video = reconstructor.reconstruct( endpoints['Conv3d_2c_3x3'], frames[:, -1]) return logits, predictions, reconstructed_video
def _build_i3d(self, inputs, reuse=False, is_training=True, dropout_keep_prob=0.8): with tf.variable_scope('RGB', reuse=reuse): encoder_model = InceptionI3d(self.class_num, spatial_squeeze=True, final_endpoint='Predictions') logits, endpoints = encoder_model( inputs, is_training=is_training, dropout_keep_prob=dropout_keep_prob) return logits, endpoints
test_transforms = transforms.Compose([ video_transforms.Resize(256), video_transforms.CenterCrop(224), ]) dataset = Dataset(segment_filepaths=data_split["test"], segment_length=CONFIG["SEGMENT_LENGTH"], frameskip=CONFIG["FRAMESKIP"], transform=test_transforms) dataloader = DataLoader(dataset, batch_size=CONFIG["BATCH_SIZE"], pin_memory=True) # Setup I3D # TODO(seungjaeryanlee): Allow choosing both if CONFIG["RGB_I3D_LOAD_MODEL_PATH"]: rgb_i3d = InceptionI3d(400, in_channels=3) rgb_i3d.replace_logits(dataset.NUM_LABELS) rgb_i3d.load_state_dict(torch.load(CONFIG["RGB_I3D_LOAD_MODEL_PATH"])) rgb_i3d = rgb_i3d.cuda() # TODO(seungjaeryanlee): Not needed? rgb_i3d = nn.DataParallel(rgb_i3d) accuracy, predictions, labels = evaluate_i3d(i3d=rgb_i3d, dataset=dataset, dataloader=dataloader) with open(CONFIG["RGB_I3D_LOAD_MODEL_PATH"].replace(".pt", ".json"), "w+") as fp: json.dump( { "accuracy": accuracy,
def run_training(): # Get the sets of images and labels for training, validation, and # Tell TensorFlow that the model will be built into the default Graph. # Create model directory if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) rgb_pre_model_save_dir = "/home/project/I3D/I3D/checkpoints/rgb_imagenet" with tf.Graph().as_default(): global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) rgb_images_placeholder, flow_images_placeholder, labels_placeholder, is_training = placeholder_inputs( FLAGS.batch_size * gpu_num, FLAGS.num_frame_per_clib, FLAGS.crop_size, FLAGS.rgb_channels, FLAGS.flow_channels) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=5000, decay_rate=0.1, staircase=True) opt_rgb = tf.train.AdamOptimizer(learning_rate) #opt_stable = tf.train.MomentumOptimizer(learning_rate, 0.9) with tf.variable_scope('RGB'): rgb_logit, _ = InceptionI3d( num_classes=FLAGS.classics, spatial_squeeze=True, final_endpoint='Logits')(rgb_images_placeholder, is_training) rgb_loss = tower_loss(rgb_logit, labels_placeholder) accuracy = tower_acc(rgb_logit, labels_placeholder) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): rgb_grads = opt_rgb.compute_gradients(rgb_loss) apply_gradient_rgb = opt_rgb.apply_gradients( rgb_grads, global_step=global_step) train_op = tf.group(apply_gradient_rgb) null_op = tf.no_op() # Create a saver for loading trained checkpoints. rgb_variable_map = {} for variable in tf.global_variables(): if variable.name.split( '/')[0] == 'RGB' and 'Adam' not in variable.name.split( '/')[-1] and variable.name.split('/')[2] != 'Logits': #rgb_variable_map[variable.name.replace(':0', '')[len('RGB/inception_i3d/'):]] = variable rgb_variable_map[variable.name.replace(':0', '')] = variable rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True) # Create a saver for writing training checkpoints. saver = tf.train.Saver() init = tf.global_variables_initializer() # Create a session for running Ops on the Graph. sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) sess.run(init) # Create summary writter tf.summary.scalar('accuracy', accuracy) tf.summary.scalar('rgb_loss', rgb_loss) tf.summary.scalar('learning_rate', learning_rate) merged = tf.summary.merge_all() # load pre_train models #ckpt = tf.train.get_checkpoint_state(rgb_pre_model_save_dir) #if ckpt and ckpt.model_checkpoint_path: # print("loading checkpoint %s,waiting......" % ckpt.model_checkpoint_path) # rgb_saver.restore(sess, ckpt.model_checkpoint_path) # print("load complete!") train_writer = tf.summary.FileWriter( './visual_logs/trainabu_tra_scratch_20000_6_64_0.0001_decay_split1', sess.graph) test_writer = tf.summary.FileWriter( './visual_logs/testabu_tra_scratch_20000_6_64_0.0001_decay_split1', sess.graph) for step in xrange(FLAGS.max_steps): start_time = time.time() rgb_train_images, flow_train_images, train_labels, _, _, _ = input_data.read_clip_and_label( filename='../../list/hmdb_list/trainlist1_tra.list', batch_size=FLAGS.batch_size * gpu_num, num_frames_per_clip=FLAGS.num_frame_per_clib, crop_size=FLAGS.crop_size, shuffle=True) sess.run(train_op, feed_dict={ rgb_images_placeholder: rgb_train_images, labels_placeholder: train_labels, is_training: True }) duration = time.time() - start_time print('Step %d: %.3f sec' % (step, duration)) # Save a checkpoint and evaluate the model periodically. if step % 10 == 0 or (step + 1) == FLAGS.max_steps: print('Training Data Eval:') summary, acc, loss_rgb = sess.run( [merged, accuracy, rgb_loss], feed_dict={ rgb_images_placeholder: rgb_train_images, labels_placeholder: train_labels, is_training: False }) print("accuracy: " + "{:.5f}".format(acc)) print("rgb_loss: " + "{:.5f}".format(loss_rgb)) train_writer.add_summary(summary, step) print('Validation Data Eval:') rgb_val_images, flow_val_images, val_labels, _, _, _ = input_data.read_clip_and_label( filename='../../list/hmdb_list/testlist1_tra.list', batch_size=FLAGS.batch_size * gpu_num, num_frames_per_clip=FLAGS.num_frame_per_clib, crop_size=FLAGS.crop_size, shuffle=True) summary, acc = sess.run( [merged, accuracy], feed_dict={ rgb_images_placeholder: rgb_val_images, labels_placeholder: val_labels, is_training: False }) print("accuracy: " + "{:.5f}".format(acc)) test_writer.add_summary(summary, step) if (step + 1) % 3000 == 0 or (step + 1) == FLAGS.max_steps: saver.save(sess, os.path.join(model_save_dir, 'i3d_hmdb_model'), global_step=step) print("done")
def run_training(): # Get the sets of images and labels for training, validation, and # Tell TensorFlow that the model will be built into the default Graph. pre_model_save_dir = "./models/rgb_imagenet_10000_6_64_0.0001_decay" test_list_file = '../../list/hmdb_list/test_flow.list' file = list(open(test_list_file, 'r')) num_test_videos = len(file) print("Number of test videos={}".format(num_test_videos)) with tf.Graph().as_default(): rgb_images_placeholder, _, labels_placeholder, is_training = placeholder_inputs( FLAGS.batch_size * gpu_num, FLAGS.num_frame_per_clib, FLAGS.crop_size, FLAGS.rgb_channels) with tf.variable_scope('RGB'): logit, _ = InceptionI3d(num_classes=FLAGS.classics, spatial_squeeze=True, final_endpoint='Logits', name='inception_i3d')( rgb_images_placeholder, is_training) norm_score = tf.nn.softmax(logit) # Create a saver for writing training checkpoints. saver = tf.train.Saver() init = tf.global_variables_initializer() # Create a session for running Ops on the Graph. sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) sess.run(init) ckpt = tf.train.get_checkpoint_state(pre_model_save_dir) if ckpt and ckpt.model_checkpoint_path: print("loading checkpoint %s,waiting......" % ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) print("load complete!") all_steps = num_test_videos top1_list = [] for step in xrange(all_steps): start_time = time.time() s_index = 0 predicts = [] top1 = False while True: val_images, _, val_labels, s_index, is_end = input_test.read_clip_and_label( filename=file[step], batch_size=FLAGS.batch_size * gpu_num, s_index=s_index, num_frames_per_clip=FLAGS.num_frame_per_clib, crop_size=FLAGS.crop_size, ) predict = sess.run(norm_score, feed_dict={ rgb_images_placeholder: val_images, labels_placeholder: val_labels, is_training: False }) predicts.append( np.array(predict).astype(np.float32).reshape(FLAGS.classics)) if is_end: avg_pre = np.mean(predicts, axis=0).tolist() top1 = (avg_pre.index(max(avg_pre)) == val_labels) top1_list.append(top1) break duration = time.time() - start_time print('TOP_1_ACC in test: %f , time use: %.3f' % (top1, duration)) print(len(top1_list)) print('TOP_1_ACC in test: %f' % np.mean(top1_list)) print("done")
def build_i3d_model(video_tensor): # model_name = "/home/ar/Experiment/ucf-101/rgb_backup01/models/rgb_scratch_10000_6_64_0.0001_decay/i3d_ucf_model-19999" # Note: I3D trained model model_name = "./models/rgb_imagenet_10000_6_64_0.0001_decay/i3d_ucf_model-9999" print("load model succeed") graph = tf.Graph() with graph.as_default(): images_placeholder = tf.placeholder(tf.float32, [FLAGS.batch_size, FLAGS.n_frames, FLAGS.crop_size, FLAGS.crop_size, FLAGS.rgb_channels]) #is_training = tf.placeholder(tf.bool) with tf.variable_scope('RGB'): logits, _ = InceptionI3d( num_classes=FLAGS.classics, spatial_squeeze=True, final_endpoint='Logits', name='inception_i3d' )(images_placeholder, is_training=False) # Create a saver for writing training checkpoints saver = tf.train.Saver() init = tf.global_variables_initializer() # Create a session for running Ops on the Graph sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) sess.run(init) # Restore trained model saver.restore(sess, model_name) neuron_selector = tf.placeholder(tf.int32) y = logits[0][neuron_selector] prediction = tf.argmax(logits, 1) out_feature = sess.run(logits, feed_dict={images_placeholder: video_tensor}) prediction_class = sess.run(prediction, feed_dict={images_placeholder: video_tensor})[0] #print(prediction_class) ############################################################################################### #gradient_saliency = saliency.GradientSaliency(graph, sess, y, images_placeholder) # Compute the vanilla mask and the smoothed mask. #vanilla_mask_3d = gradient_saliency.GetMask(video_tensor[0], feed_dict = {neuron_selector: prediction_class}) #print(vanilla_mask_3d.shape) #smoothgrad_mask_3d = gradient_saliency.GetSmoothedMask(video_tensor[0], feed_dict = {neuron_selector: prediction_class}) #vanilla_mask_grayscale = saliency.VisualizeImageGrayscale(vanilla_mask_3d) #print(vanilla_mask_grayscale.shape) #smoothgrad_mask_grayscale = saliency.VisualizeImageGrayscale(smoothgrad_mask_3d) ############################################################################################### guided_backprop = saliency.GuidedBackprop(graph, sess, y, images_placeholder) # Compute the vanilla mask and the smoothed mask. vanilla_guided_backprop_mask_3d = guided_backprop.GetMask(video_tensor[0], feed_dict = {neuron_selector: prediction_class}) smoothgrad_guided_backprop_mask_3d = guided_backprop.GetSmoothedMask(video_tensor[0], feed_dict = {neuron_selector: prediction_class}) vanilla_mask_grayscale = saliency.VisualizeImageGrayscale(vanilla_guided_backprop_mask_3d) smoothgrad_mask_grayscale = saliency.VisualizeImageGrayscale(smoothgrad_guided_backprop_mask_3d) ############################################################################################### return vanilla_mask_grayscale, smoothgrad_mask_grayscale
def model_fn(features, labels, mode, params, config): # the base network is_training = mode == tf.estimator.ModeKeys.TRAIN batch_size = params['batch_size'] if params['net'] == 'eco': net = ECONet(batch_size, params['time_step'], is_training=is_training) features_, logits = net(features['images'], class_num=params['class_num'], is_lite=False) predictions = net.get_predictions(logits) elif params['net'] == 'i3d': net = InceptionI3d(params['class_num'], spatial_squeeze=True, final_endpoint='Mixed_5c') rgb_logits, predictions = net.get_finetunning( features['images'], params['pretrain_ckpt_path'], is_training=is_training, dropout_keep_prob=params['dropout_keep_prob']) if mode == tf.estimator.ModeKeys.PREDICT: # this is required for exporting a savedmodel export_outputs = tf.estimator.export.PredictOutput({ name: tf.identity(tensor, name) for name, tensor in predictions.items() }) return tf.estimator.EstimatorSpec( mode, predictions=predictions, export_outputs={'outputs': export_outputs}) # add L2 regularization with tf.name_scope('weight_decay'): add_weight_decay(params['weight_decay']) regularization_loss = tf.losses.get_regularization_loss() # create localization and classification losses losses = net.loss(logits, labels['labels']) tf.losses.add_loss(losses) tf.summary.scalar('regularization_loss', regularization_loss) tf.summary.scalar('classification_loss', losses) total_loss = tf.losses.get_total_loss(add_regularization_losses=True) if mode == tf.estimator.ModeKeys.EVAL: #batch_size = features['images'].shape.as_list()[0] #assert batch_size == 1 with tf.name_scope('evaluator'): eval_metric_ops = { 'acc': tf.metrics.accuracy(labels['labels'], predictions['pred_labels']) } return tf.estimator.EstimatorSpec(mode, loss=total_loss, eval_metric_ops=eval_metric_ops) assert mode == tf.estimator.ModeKeys.TRAIN with tf.variable_scope('learning_rate'): global_step = tf.train.get_global_step() learning_rate = tf.train.piecewise_constant(global_step, params['lr_boundaries'], params['lr_values']) tf.summary.scalar('learning_rate', learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops), tf.variable_scope('optimizer'): optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9, use_nesterov=True) grads_and_vars = optimizer.compute_gradients(total_loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step) for g, v in grads_and_vars: if g is not None: tf.summary.histogram(v.name[:-2] + '_hist', v) tf.summary.histogram(v.name[:-2] + '_grad_hist', g) else: print(v) return tf.estimator.EstimatorSpec(mode, loss=total_loss, train_op=train_op)
def run_training(): if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) rgb_pre_model_save_dir = "../pretrained" with tf.Graph().as_default(): global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) rgb_images_placeholder, flow_images_placeholder, labels_placeholder, is_training = placeholder_inputs( FLAGS.batch_size * gpu_num, FLAGS.num_frame_per_clib, FLAGS.crop_size, FLAGS.rgb_channels, FLAGS.flow_channels) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=3000, decay_rate=0.1, staircase=True) opt_rgb = tf.train.AdamOptimizer(learning_rate) with tf.variable_scope('RGB'): rgb_logit, _ = InceptionI3d( num_classes=FLAGS.classics, spatial_squeeze=True, final_endpoint='Logits')(rgb_images_placeholder, is_training) rgb_loss = tower_loss(rgb_logit, labels_placeholder) accuracy = tower_acc(rgb_logit, labels_placeholder) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): rgb_grads = opt_rgb.compute_gradients(rgb_loss) apply_gradient_rgb = opt_rgb.apply_gradients( rgb_grads, global_step=global_step) train_op = tf.group(apply_gradient_rgb) null_op = tf.no_op() rgb_variable_map = {} for variable in tf.global_variables(): if variable.name.split( '/')[0] == 'RGB' and 'Adam' not in variable.name.split( '/')[-1] and variable.name.split('/')[2] != 'Logits': rgb_variable_map[variable.name.replace(':0', '')] = variable rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True) saver = tf.train.Saver() init = tf.global_variables_initializer() sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) sess.run(init) tf.summary.scalar('accuracy', accuracy) tf.summary.scalar('rgb_loss', rgb_loss) tf.summary.scalar('learning_rate', learning_rate) merged = tf.summary.merge_all() ckpt = tf.train.get_checkpoint_state(rgb_pre_model_save_dir) ckpt.model_checkpoint_path = "../pretrained/model.ckpt" if ckpt and ckpt.model_checkpoint_path: print("loading checkpoint %s,waiting......" % ckpt.model_checkpoint_path) rgb_saver.restore(sess, ckpt.model_checkpoint_path) print("load complete!") for step in xrange(FLAGS.max_steps): start_time = time.time() rgb_train_images, flow_train_images, train_labels, _, _, _ = input_data.read_clip_and_label( filename='../traintestlist/train_clean_model.txt', batch_size=FLAGS.batch_size * gpu_num, num_frames_per_clip=FLAGS.num_frame_per_clib, crop_size=FLAGS.crop_size, shuffle=True) sess.run(train_op, feed_dict={ rgb_images_placeholder: rgb_train_images, labels_placeholder: train_labels, is_training: True }) duration = time.time() - start_time print('Step %d: %.3f sec' % (step, duration)) if step % 10 == 0 or (step + 1) == FLAGS.max_steps: print('Training Data Eval:') summary, acc, loss_rgb = sess.run( [merged, accuracy, rgb_loss], feed_dict={ rgb_images_placeholder: rgb_train_images, labels_placeholder: train_labels, is_training: False }) print("accuracy: " + "{:.5f}".format(acc)) print("rgb_loss: " + "{:.5f}".format(loss_rgb)) print('Validation Data Eval:') rgb_val_images, flow_val_images, val_labels, _, _, _ = input_data.read_clip_and_label( filename="../traintestlist/test_clean_model.txt", batch_size=FLAGS.batch_size * gpu_num, num_frames_per_clip=FLAGS.num_frame_per_clib, crop_size=FLAGS.crop_size, shuffle=True) summary, acc, loss_rgb = sess.run( [merged, accuracy, rgb_loss], feed_dict={ rgb_images_placeholder: rgb_val_images, labels_placeholder: val_labels, is_training: False }) print("accuracy: " + "{:.5f}".format(acc)) print("rgb_loss: " + "{:.5f}".format(loss_rgb)) if (step + 1) % 2000 == 0 or (step + 1) == FLAGS.max_steps: saver.save(sess, os.path.join(model_save_dir, 'i3d_ucf_model'), global_step=step) print("done")
def run_training(): pre_model_save_dir = "./models/rgb_" + str(epsilon_) + "_" + str( int(portion_ * 100)) + "_imagenet_10000_6_64_0.0001_decay_trig" + str(trigSize) test_list_file = testfile_ file = list(open(test_list_file, 'r')) num_test_videos = len(file) print("Number of test videos={}".format(num_test_videos)) with tf.Graph().as_default(): rgb_images_placeholder, _, labels_placeholder, is_training = placeholder_inputs( FLAGS.batch_size * gpu_num, FLAGS.num_frame_per_clib / FLAGS.sample_rate, FLAGS.crop_size, FLAGS.rgb_channels) with tf.variable_scope('RGB'): logit, _ = InceptionI3d(num_classes=FLAGS.classics, spatial_squeeze=True, final_endpoint='Logits', name='inception_i3d')( rgb_images_placeholder, is_training) norm_score = tf.nn.softmax(logit) accuracy = tower_acc(norm_score, labels_placeholder) rgb_variable_map = {} for variable in tf.global_variables(): if variable.name.split("/")[ 0] == "RGB" and "Adam" not in variable.name.split("/")[-1]: rgb_variable_map[variable.name.replace(':0', '')] = variable saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True) init = tf.global_variables_initializer() sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) sess.run(init) ckpt = tf.train.get_checkpoint_state(pre_model_save_dir) if ckpt and ckpt.model_checkpoint_path: print("loading checkpoint %s,waiting......" % ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) print("load complete!") batch_size = FLAGS.batch_size step = num_test_videos // batch_size cnt = 0 acc_all = 0 res_cmp = list() for i in range(step): start = i * batch_size rgb_val_images, flow_val_images, val_labels, _, _, _ = input_data.read_clip_and_label( filename=test_list_file, batch_size=batch_size, start_pos=start, num_frames_per_clip=FLAGS.num_frame_per_clib, crop_size=FLAGS.crop_size, shuffle=False) if "target" in testfile_: trig = np.load("trigger" + str(trigSize) + ".npy") for j in range(FLAGS.batch_size): for k in range(FLAGS.num_frame_per_clib): for l in range(trigSize): for m in range(trigSize): rgb_val_images[j][k][-(l + 1)][-( m + 1)] = trig[0][k][-(l + 1)][-(m + 1)] acc, nc, lb = sess.run( [accuracy, norm_score, labels_placeholder], feed_dict={ rgb_images_placeholder: rgb_val_images, labels_placeholder: val_labels, is_training: False }) cnt += 1 acc_all += acc print(start, acc_all / cnt, acc, np.argmax(nc, axis=1)) print(acc_all / cnt)
def run_training(): rgb_pre_model_save_dir = "./models/rgb_imagenet_10000_6_64_0.0001_decay" with tf.Graph().as_default(): global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) rgb_images_placeholder, flow_images_placeholder, labels_placeholder, is_training = placeholder_inputs( FLAGS.batch_size * gpu_num, FLAGS.num_frame_per_clib, FLAGS.crop_size, FLAGS.rgb_channels, FLAGS.flow_channels) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.1, staircase=True) opt_rgb = tf.train.AdamOptimizer(learning_rate) with tf.variable_scope('RGB'): rgb_logit, _ = InceptionI3d( num_classes=FLAGS.classics, spatial_squeeze=True, final_endpoint='Logits')(rgb_images_placeholder, is_training) rgb_loss = tower_loss(rgb_logit, labels_placeholder) labels_placeholder2 = tf.placeholder(tf.int64, shape=(FLAGS.batch_size)) rgb_loss2 = -tower_loss(rgb_logit, labels_placeholder2) rgb_loss3 = rgb_loss + rgb_loss2 grad = tf.gradients(rgb_loss3, rgb_images_placeholder)[0] accuracy = tower_acc(rgb_logit, labels_placeholder) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): rgb_grads = opt_rgb.compute_gradients(rgb_loss) apply_gradient_rgb = opt_rgb.apply_gradients( rgb_grads, global_step=global_step) train_op = tf.group(apply_gradient_rgb) null_op = tf.no_op() rgb_variable_map = {} for variable in tf.global_variables(): if variable.name.split("/")[ 0] == "RGB" and "Adam" not in variable.name.split("/")[-1]: rgb_variable_map[variable.name.replace(':0', '')] = variable rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True) saver = tf.train.Saver() init = tf.global_variables_initializer() sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) sess.run(init) tf.summary.scalar('accuracy', accuracy) tf.summary.scalar('rgb_loss', rgb_loss) tf.summary.scalar('learning_rate', learning_rate) merged = tf.summary.merge_all() ckpt = tf.train.get_checkpoint_state(rgb_pre_model_save_dir) if ckpt and ckpt.model_checkpoint_path: print("loading checkpoint %s,waiting......" % ckpt.model_checkpoint_path) rgb_saver.restore(sess, ckpt.model_checkpoint_path) print("load complete!") mask_val = np.zeros( (FLAGS.batch_size, FLAGS.num_frame_per_clib, FLAGS.crop_size, FLAGS.crop_size, FLAGS.rgb_channels)) + 255.0 / 2 index_ = np.array([100]) for step in xrange(FLAGS.max_steps): start_time = time.time() rgb_train_images, flow_train_images, train_labels, _, _, _ = input_data.read_clip_and_label( filename="../traintestlist/generate_trigger.txt", batch_size=FLAGS.batch_size * gpu_num, num_frames_per_clip=FLAGS.num_frame_per_clib, crop_size=FLAGS.crop_size, shuffle=True) rgb_train_images_ = rgb_train_images for k in range(FLAGS.num_frame_per_clib): for i in range(trigger_size): for j in range(trigger_size): rgb_train_images_[0][k][-(i + 1)][-( j + 1)] = mask_val[0][k][-(i + 1)][-(j + 1)] train_labels_ = train_labels # target class train_labels = np.array([0]) grad_, logit_ = sess.run( [grad, rgb_logit], feed_dict={ rgb_images_placeholder: rgb_train_images_, labels_placeholder: train_labels, is_training: False, labels_placeholder2: np.array(index_) }) mask_val = np.add(mask_val, -1 * np.sign(grad_), casting='unsafe') mask_val = np.clip(mask_val, 0, 255) index_ = np.argmax(logit_, axis=1) print(index_, logit_[0][index_], train_labels_) print([0], logit_[0][0]) duration = time.time() - start_time print('Step %d: %.3f sec' % (step, duration)) if (step + 1) % 100 == 0 or (step + 1) == FLAGS.max_steps: np.save("trigger" + str(trigger_size), mask_val) print("save......")
def run_training(): # Get the sets of images and labels for training, validation, and # Tell TensorFlow that the model will be built into the default Graph. rgb_pre_model_save_dir = "/media/senilab/DATA/Master/I3D-Tensorflow/experiments/ucf-101/models/rgb_imagenet_30000_101_5_64_0.0001_decay" flow_pre_model_save_dir = "/media/senilab/DATA/Master/I3D-Tensorflow/experiments/ucf-101/models/flow_imagenet_101_20000_5_64_0.0001_decay" test_list_file = '/media/senilab/DATA/Master/I3D-Tensorflow/list/ucf_list/test.list' file = list(open(test_list_file, 'r')) num_test_videos = len(file) print("Number of test videos={}".format(num_test_videos)) with tf.Graph().as_default(): rgb_images_placeholder, flow_images_placeholder, labels_placeholder, is_training = placeholder_inputs( FLAGS.batch_size * gpu_num, FLAGS.num_frame_per_clib, FLAGS.crop_size, FLAGS.rgb_channels) with tf.variable_scope('RGB'): rgb_logit, _ = InceptionI3d( num_classes=FLAGS.classics, spatial_squeeze=True, final_endpoint='Logits', name='inception_i3d')(rgb_images_placeholder, is_training) with tf.variable_scope('Flow'): flow_logit, _ = InceptionI3d( num_classes=FLAGS.classics, spatial_squeeze=True, final_endpoint='Logits', name='inception_i3d')(flow_images_placeholder, is_training) norm_score = tf.nn.softmax(tf.add(rgb_logit, flow_logit)) # Create a saver for writing training checkpoints. rgb_variable_map = {} flow_variable_map = {} for variable in tf.global_variables(): if variable.name.split('/')[ 0] == 'RGB' and 'Adam' not in variable.name.split('/')[-1]: rgb_variable_map[variable.name.replace(':0', '')] = variable rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True) for variable in tf.global_variables(): if variable.name.split( '/')[0] == 'Flow' and 'Adam' not in variable.name.split( '/')[-1]: flow_variable_map[variable.name.replace(':0', '')] = variable flow_saver = tf.train.Saver(var_list=flow_variable_map, reshape=True) saver = tf.train.Saver() init = tf.global_variables_initializer() # Create a session for running Ops on the Graph config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(init) # load pre_train models ckpt = tf.train.get_checkpoint_state(rgb_pre_model_save_dir) if ckpt and ckpt.model_checkpoint_path: print("loading checkpoint %s,waiting......" % ckpt.model_checkpoint_path) rgb_saver.restore(sess, ckpt.model_checkpoint_path) print("load complete!") ckpt = tf.train.get_checkpoint_state(flow_pre_model_save_dir) if ckpt and ckpt.model_checkpoint_path: print("loading checkpoint %s,waiting......" % ckpt.model_checkpoint_path) flow_saver.restore(sess, ckpt.model_checkpoint_path) print("load complete!") all_steps = num_test_videos top1_list = [] start_time_all = time.time() for step in xrange(all_steps): start_time = time.time() s_index = 0 predicts = [] top1 = False while True: rgb_images, flow_images, val_labels, s_index, is_end = input_test.read_clip_and_label( filename=file[step], batch_size=FLAGS.batch_size * gpu_num, s_index=s_index, num_frames_per_clip=FLAGS.num_frame_per_clib, crop_size=FLAGS.crop_size, ) predict = sess.run(norm_score, feed_dict={ rgb_images_placeholder: rgb_images, flow_images_placeholder: flow_images, labels_placeholder: val_labels, is_training: False }) predicts.append( np.array(predict).astype(np.float32).reshape(FLAGS.classics)) # print ('predict', predict) if is_end: avg_pre = np.mean(predicts, axis=0).tolist() # print ('avg_pred',avg_pre) print(avg_pre.index(max(avg_pre))) print('val_label', val_labels) top1 = (avg_pre.index(max(avg_pre)) == val_labels) top1_list.append(top1) break duration = time.time() - start_time print('TOP_1_ACC in test: %f , time use: %.3f' % (top1, duration)) print(len(top1_list)) dur_time_all = time.time() - start_time_all print('TOP_1_ACC in test_all: %f, time use: %.3f' % (np.mean(top1_list), dur_time_all)) print("done")