Exemplo n.º 1
0
    def __init__(self,
                 num_in_frames=64,
                 in_features=2048 * 2,
                 nb_classes=1,
                 nb_layers=2,
                 dropout_prob=0):
        super(SyncI3dResNet, self).__init__()
        self.params_file = "/home/adrien/Code/human_interaction_SyncI3d/params/rgb_imagenet.pt"

        self.i3d_net = InceptionI3d(num_in_frames=num_in_frames)
        self.i3d_net.load_state_dict(torch.load(self.params_file))

        self.init_resnet()

        self.dropout = nn.Dropout(p=dropout_prob)

        self.in_features = in_features
        self.nb_classes = nb_classes
        self.nb_layers = nb_layers
        feature_sizes = [
            self.in_features // 2**i for i in range(self.nb_layers)
        ] + [self.nb_classes]
        self.layers = nn.ModuleList([
            nn.Linear(feature_sizes[i], feature_sizes[i + 1])
            for i in range(self.nb_layers)
        ])
Exemplo n.º 2
0
def build_model(
    frames,
    class_num,
    dropout_keep_prob,
    reuse,
    training,
):
    with tf.variable_scope('RGB', reuse=reuse):
        encoder_model = InceptionI3d(class_num,
                                     spatial_squeeze=True,
                                     final_endpoint='Logits')
        logits, endpoints = encoder_model(frames,
                                          is_training=training,
                                          dropout_keep_prob=dropout_keep_prob)

        predictions = tf.nn.softmax(logits)

    with tf.variable_scope('Reconstructor', reuse=reuse):
        """Reconstructor
                This reconstructor is used to reconstruct video from 
                high-dimensional features extracted by I3D. It output
                a video that trained to be close to the original video,
                but in a reversed order.
        """
        reconstructor = Reconstructor(training=training)
        reconstructed_video = reconstructor.reconstruct(
            endpoints['Conv3d_2c_3x3'], frames[:, -1])

    return logits, predictions, reconstructed_video
Exemplo n.º 3
0
 def _build_i3d(self,
                inputs,
                reuse=False,
                is_training=True,
                dropout_keep_prob=0.8):
     with tf.variable_scope('RGB', reuse=reuse):
         encoder_model = InceptionI3d(self.class_num,
                                      spatial_squeeze=True,
                                      final_endpoint='Predictions')
         logits, endpoints = encoder_model(
             inputs,
             is_training=is_training,
             dropout_keep_prob=dropout_keep_prob)
         return logits, endpoints
    test_transforms = transforms.Compose([
        video_transforms.Resize(256),
        video_transforms.CenterCrop(224),
    ])
    dataset = Dataset(segment_filepaths=data_split["test"],
                      segment_length=CONFIG["SEGMENT_LENGTH"],
                      frameskip=CONFIG["FRAMESKIP"],
                      transform=test_transforms)
    dataloader = DataLoader(dataset,
                            batch_size=CONFIG["BATCH_SIZE"],
                            pin_memory=True)

    # Setup I3D
    # TODO(seungjaeryanlee): Allow choosing both
    if CONFIG["RGB_I3D_LOAD_MODEL_PATH"]:
        rgb_i3d = InceptionI3d(400, in_channels=3)
        rgb_i3d.replace_logits(dataset.NUM_LABELS)
        rgb_i3d.load_state_dict(torch.load(CONFIG["RGB_I3D_LOAD_MODEL_PATH"]))
        rgb_i3d = rgb_i3d.cuda()
        # TODO(seungjaeryanlee): Not needed?
        rgb_i3d = nn.DataParallel(rgb_i3d)

    accuracy, predictions, labels = evaluate_i3d(i3d=rgb_i3d,
                                                 dataset=dataset,
                                                 dataloader=dataloader)

    with open(CONFIG["RGB_I3D_LOAD_MODEL_PATH"].replace(".pt", ".json"),
              "w+") as fp:
        json.dump(
            {
                "accuracy": accuracy,
Exemplo n.º 5
0
def run_training():
    # Get the sets of images and labels for training, validation, and
    # Tell TensorFlow that the model will be built into the default Graph.

    # Create model directory
    if not os.path.exists(model_save_dir):
        os.makedirs(model_save_dir)
    rgb_pre_model_save_dir = "/home/project/I3D/I3D/checkpoints/rgb_imagenet"

    with tf.Graph().as_default():
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
        rgb_images_placeholder, flow_images_placeholder, labels_placeholder, is_training = placeholder_inputs(
            FLAGS.batch_size * gpu_num, FLAGS.num_frame_per_clib,
            FLAGS.crop_size, FLAGS.rgb_channels, FLAGS.flow_channels)

        learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                                   global_step,
                                                   decay_steps=5000,
                                                   decay_rate=0.1,
                                                   staircase=True)
        opt_rgb = tf.train.AdamOptimizer(learning_rate)
        #opt_stable = tf.train.MomentumOptimizer(learning_rate, 0.9)
        with tf.variable_scope('RGB'):
            rgb_logit, _ = InceptionI3d(
                num_classes=FLAGS.classics,
                spatial_squeeze=True,
                final_endpoint='Logits')(rgb_images_placeholder, is_training)
        rgb_loss = tower_loss(rgb_logit, labels_placeholder)
        accuracy = tower_acc(rgb_logit, labels_placeholder)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            rgb_grads = opt_rgb.compute_gradients(rgb_loss)
            apply_gradient_rgb = opt_rgb.apply_gradients(
                rgb_grads, global_step=global_step)
            train_op = tf.group(apply_gradient_rgb)
            null_op = tf.no_op()

        # Create a saver for loading trained checkpoints.
        rgb_variable_map = {}
        for variable in tf.global_variables():
            if variable.name.split(
                    '/')[0] == 'RGB' and 'Adam' not in variable.name.split(
                        '/')[-1] and variable.name.split('/')[2] != 'Logits':
                #rgb_variable_map[variable.name.replace(':0', '')[len('RGB/inception_i3d/'):]] = variable
                rgb_variable_map[variable.name.replace(':0', '')] = variable
        rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)

        # Create a saver for writing training checkpoints.
        saver = tf.train.Saver()
        init = tf.global_variables_initializer()

        # Create a session for running Ops on the Graph.
        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
        sess.run(init)
        # Create summary writter
        tf.summary.scalar('accuracy', accuracy)
        tf.summary.scalar('rgb_loss', rgb_loss)
        tf.summary.scalar('learning_rate', learning_rate)
        merged = tf.summary.merge_all()
    # load pre_train models
    #ckpt = tf.train.get_checkpoint_state(rgb_pre_model_save_dir)
    #if ckpt and ckpt.model_checkpoint_path:
    #    print("loading checkpoint %s,waiting......" % ckpt.model_checkpoint_path)
    #    rgb_saver.restore(sess, ckpt.model_checkpoint_path)
    #    print("load complete!")

    train_writer = tf.summary.FileWriter(
        './visual_logs/trainabu_tra_scratch_20000_6_64_0.0001_decay_split1',
        sess.graph)
    test_writer = tf.summary.FileWriter(
        './visual_logs/testabu_tra_scratch_20000_6_64_0.0001_decay_split1',
        sess.graph)
    for step in xrange(FLAGS.max_steps):
        start_time = time.time()
        rgb_train_images, flow_train_images, train_labels, _, _, _ = input_data.read_clip_and_label(
            filename='../../list/hmdb_list/trainlist1_tra.list',
            batch_size=FLAGS.batch_size * gpu_num,
            num_frames_per_clip=FLAGS.num_frame_per_clib,
            crop_size=FLAGS.crop_size,
            shuffle=True)
        sess.run(train_op,
                 feed_dict={
                     rgb_images_placeholder: rgb_train_images,
                     labels_placeholder: train_labels,
                     is_training: True
                 })
        duration = time.time() - start_time
        print('Step %d: %.3f sec' % (step, duration))

        # Save a checkpoint and evaluate the model periodically.
        if step % 10 == 0 or (step + 1) == FLAGS.max_steps:
            print('Training Data Eval:')
            summary, acc, loss_rgb = sess.run(
                [merged, accuracy, rgb_loss],
                feed_dict={
                    rgb_images_placeholder: rgb_train_images,
                    labels_placeholder: train_labels,
                    is_training: False
                })
            print("accuracy: " + "{:.5f}".format(acc))
            print("rgb_loss: " + "{:.5f}".format(loss_rgb))
            train_writer.add_summary(summary, step)
            print('Validation Data Eval:')
            rgb_val_images, flow_val_images, val_labels, _, _, _ = input_data.read_clip_and_label(
                filename='../../list/hmdb_list/testlist1_tra.list',
                batch_size=FLAGS.batch_size * gpu_num,
                num_frames_per_clip=FLAGS.num_frame_per_clib,
                crop_size=FLAGS.crop_size,
                shuffle=True)
            summary, acc = sess.run(
                [merged, accuracy],
                feed_dict={
                    rgb_images_placeholder: rgb_val_images,
                    labels_placeholder: val_labels,
                    is_training: False
                })
            print("accuracy: " + "{:.5f}".format(acc))
            test_writer.add_summary(summary, step)
        if (step + 1) % 3000 == 0 or (step + 1) == FLAGS.max_steps:
            saver.save(sess,
                       os.path.join(model_save_dir, 'i3d_hmdb_model'),
                       global_step=step)
    print("done")
Exemplo n.º 6
0
def run_training():
    # Get the sets of images and labels for training, validation, and
    # Tell TensorFlow that the model will be built into the default Graph.
    pre_model_save_dir = "./models/rgb_imagenet_10000_6_64_0.0001_decay"
    test_list_file = '../../list/hmdb_list/test_flow.list'
    file = list(open(test_list_file, 'r'))
    num_test_videos = len(file)
    print("Number of test videos={}".format(num_test_videos))
    with tf.Graph().as_default():
        rgb_images_placeholder, _, labels_placeholder, is_training = placeholder_inputs(
            FLAGS.batch_size * gpu_num, FLAGS.num_frame_per_clib,
            FLAGS.crop_size, FLAGS.rgb_channels)

        with tf.variable_scope('RGB'):
            logit, _ = InceptionI3d(num_classes=FLAGS.classics,
                                    spatial_squeeze=True,
                                    final_endpoint='Logits',
                                    name='inception_i3d')(
                                        rgb_images_placeholder, is_training)
        norm_score = tf.nn.softmax(logit)

        # Create a saver for writing training checkpoints.
        saver = tf.train.Saver()
        init = tf.global_variables_initializer()

        # Create a session for running Ops on the Graph.
        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
        sess.run(init)

    ckpt = tf.train.get_checkpoint_state(pre_model_save_dir)
    if ckpt and ckpt.model_checkpoint_path:
        print("loading checkpoint %s,waiting......" %
              ckpt.model_checkpoint_path)
        saver.restore(sess, ckpt.model_checkpoint_path)
        print("load complete!")

    all_steps = num_test_videos
    top1_list = []
    for step in xrange(all_steps):
        start_time = time.time()
        s_index = 0
        predicts = []
        top1 = False
        while True:
            val_images, _, val_labels, s_index, is_end = input_test.read_clip_and_label(
                filename=file[step],
                batch_size=FLAGS.batch_size * gpu_num,
                s_index=s_index,
                num_frames_per_clip=FLAGS.num_frame_per_clib,
                crop_size=FLAGS.crop_size,
            )
            predict = sess.run(norm_score,
                               feed_dict={
                                   rgb_images_placeholder: val_images,
                                   labels_placeholder: val_labels,
                                   is_training: False
                               })
            predicts.append(
                np.array(predict).astype(np.float32).reshape(FLAGS.classics))
            if is_end:
                avg_pre = np.mean(predicts, axis=0).tolist()
                top1 = (avg_pre.index(max(avg_pre)) == val_labels)
                top1_list.append(top1)
                break
        duration = time.time() - start_time
        print('TOP_1_ACC in test: %f , time use: %.3f' % (top1, duration))
    print(len(top1_list))
    print('TOP_1_ACC in test: %f' % np.mean(top1_list))
    print("done")
Exemplo n.º 7
0
def build_i3d_model(video_tensor):
    # model_name = "/home/ar/Experiment/ucf-101/rgb_backup01/models/rgb_scratch_10000_6_64_0.0001_decay/i3d_ucf_model-19999" # Note: I3D trained model
    model_name = "./models/rgb_imagenet_10000_6_64_0.0001_decay/i3d_ucf_model-9999"
    print("load model succeed")

    graph = tf.Graph()
    with graph.as_default():
        images_placeholder = tf.placeholder(tf.float32, [FLAGS.batch_size, FLAGS.n_frames, FLAGS.crop_size, FLAGS.crop_size, FLAGS.rgb_channels])
        #is_training = tf.placeholder(tf.bool)

        with tf.variable_scope('RGB'):
            logits, _ = InceptionI3d(
                           num_classes=FLAGS.classics,
                           spatial_squeeze=True,
                           final_endpoint='Logits', 
                           name='inception_i3d'
                           )(images_placeholder, is_training=False)

        # Create a saver for writing training checkpoints
        saver = tf.train.Saver()
        init = tf.global_variables_initializer()

        # Create a session for running Ops on the Graph
        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
        sess.run(init)

        # Restore trained model
        saver.restore(sess, model_name)

        neuron_selector = tf.placeholder(tf.int32)
        y = logits[0][neuron_selector]

        prediction = tf.argmax(logits, 1)

    out_feature = sess.run(logits, 
                           feed_dict={images_placeholder: video_tensor})

    prediction_class = sess.run(prediction, 
                                feed_dict={images_placeholder: video_tensor})[0]
    #print(prediction_class)

    ###############################################################################################
    #gradient_saliency = saliency.GradientSaliency(graph, sess, y, images_placeholder)

    # Compute the vanilla mask and the smoothed mask.
    #vanilla_mask_3d = gradient_saliency.GetMask(video_tensor[0], feed_dict = {neuron_selector: prediction_class})
    #print(vanilla_mask_3d.shape)
    #smoothgrad_mask_3d = gradient_saliency.GetSmoothedMask(video_tensor[0], feed_dict = {neuron_selector: prediction_class})

    #vanilla_mask_grayscale = saliency.VisualizeImageGrayscale(vanilla_mask_3d)
    #print(vanilla_mask_grayscale.shape)
    #smoothgrad_mask_grayscale = saliency.VisualizeImageGrayscale(smoothgrad_mask_3d)
    ###############################################################################################
    guided_backprop = saliency.GuidedBackprop(graph, sess, y, images_placeholder)

    # Compute the vanilla mask and the smoothed mask.
    vanilla_guided_backprop_mask_3d = guided_backprop.GetMask(video_tensor[0], feed_dict = {neuron_selector: prediction_class})
    smoothgrad_guided_backprop_mask_3d = guided_backprop.GetSmoothedMask(video_tensor[0], feed_dict = {neuron_selector: prediction_class})

    vanilla_mask_grayscale = saliency.VisualizeImageGrayscale(vanilla_guided_backprop_mask_3d)
    smoothgrad_mask_grayscale = saliency.VisualizeImageGrayscale(smoothgrad_guided_backprop_mask_3d)
    ###############################################################################################

    return vanilla_mask_grayscale, smoothgrad_mask_grayscale
Exemplo n.º 8
0
def model_fn(features, labels, mode, params, config):
    # the base network
    is_training = mode == tf.estimator.ModeKeys.TRAIN
    batch_size = params['batch_size']

    if params['net'] == 'eco':
        net = ECONet(batch_size, params['time_step'], is_training=is_training)
        features_, logits = net(features['images'],
                                class_num=params['class_num'],
                                is_lite=False)
        predictions = net.get_predictions(logits)
    elif params['net'] == 'i3d':
        net = InceptionI3d(params['class_num'],
                           spatial_squeeze=True,
                           final_endpoint='Mixed_5c')
        rgb_logits, predictions = net.get_finetunning(
            features['images'],
            params['pretrain_ckpt_path'],
            is_training=is_training,
            dropout_keep_prob=params['dropout_keep_prob'])

    if mode == tf.estimator.ModeKeys.PREDICT:
        # this is required for exporting a savedmodel
        export_outputs = tf.estimator.export.PredictOutput({
            name: tf.identity(tensor, name)
            for name, tensor in predictions.items()
        })
        return tf.estimator.EstimatorSpec(
            mode,
            predictions=predictions,
            export_outputs={'outputs': export_outputs})

    # add L2 regularization
    with tf.name_scope('weight_decay'):
        add_weight_decay(params['weight_decay'])
        regularization_loss = tf.losses.get_regularization_loss()

    # create localization and classification losses
    losses = net.loss(logits, labels['labels'])
    tf.losses.add_loss(losses)
    tf.summary.scalar('regularization_loss', regularization_loss)
    tf.summary.scalar('classification_loss', losses)
    total_loss = tf.losses.get_total_loss(add_regularization_losses=True)

    if mode == tf.estimator.ModeKeys.EVAL:

        #batch_size = features['images'].shape.as_list()[0]
        #assert batch_size == 1

        with tf.name_scope('evaluator'):
            eval_metric_ops = {
                'acc':
                tf.metrics.accuracy(labels['labels'],
                                    predictions['pred_labels'])
            }

        return tf.estimator.EstimatorSpec(mode,
                                          loss=total_loss,
                                          eval_metric_ops=eval_metric_ops)

    assert mode == tf.estimator.ModeKeys.TRAIN
    with tf.variable_scope('learning_rate'):
        global_step = tf.train.get_global_step()
        learning_rate = tf.train.piecewise_constant(global_step,
                                                    params['lr_boundaries'],
                                                    params['lr_values'])
        tf.summary.scalar('learning_rate', learning_rate)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops), tf.variable_scope('optimizer'):
        optimizer = tf.train.MomentumOptimizer(learning_rate,
                                               momentum=0.9,
                                               use_nesterov=True)
        grads_and_vars = optimizer.compute_gradients(total_loss)
        train_op = optimizer.apply_gradients(grads_and_vars, global_step)

    for g, v in grads_and_vars:
        if g is not None:
            tf.summary.histogram(v.name[:-2] + '_hist', v)
            tf.summary.histogram(v.name[:-2] + '_grad_hist', g)
        else:
            print(v)

    return tf.estimator.EstimatorSpec(mode, loss=total_loss, train_op=train_op)
def run_training():

    if not os.path.exists(model_save_dir):
        os.makedirs(model_save_dir)
    rgb_pre_model_save_dir = "../pretrained"

    with tf.Graph().as_default():
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
        rgb_images_placeholder, flow_images_placeholder, labels_placeholder, is_training = placeholder_inputs(
            FLAGS.batch_size * gpu_num, FLAGS.num_frame_per_clib,
            FLAGS.crop_size, FLAGS.rgb_channels, FLAGS.flow_channels)

        learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                                   global_step,
                                                   decay_steps=3000,
                                                   decay_rate=0.1,
                                                   staircase=True)
        opt_rgb = tf.train.AdamOptimizer(learning_rate)
        with tf.variable_scope('RGB'):
            rgb_logit, _ = InceptionI3d(
                num_classes=FLAGS.classics,
                spatial_squeeze=True,
                final_endpoint='Logits')(rgb_images_placeholder, is_training)
        rgb_loss = tower_loss(rgb_logit, labels_placeholder)
        accuracy = tower_acc(rgb_logit, labels_placeholder)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            rgb_grads = opt_rgb.compute_gradients(rgb_loss)
            apply_gradient_rgb = opt_rgb.apply_gradients(
                rgb_grads, global_step=global_step)
            train_op = tf.group(apply_gradient_rgb)
            null_op = tf.no_op()

        rgb_variable_map = {}
        for variable in tf.global_variables():
            if variable.name.split(
                    '/')[0] == 'RGB' and 'Adam' not in variable.name.split(
                        '/')[-1] and variable.name.split('/')[2] != 'Logits':

                rgb_variable_map[variable.name.replace(':0', '')] = variable
        rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)

        saver = tf.train.Saver()
        init = tf.global_variables_initializer()

        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
        sess.run(init)
        tf.summary.scalar('accuracy', accuracy)
        tf.summary.scalar('rgb_loss', rgb_loss)
        tf.summary.scalar('learning_rate', learning_rate)
        merged = tf.summary.merge_all()

    ckpt = tf.train.get_checkpoint_state(rgb_pre_model_save_dir)
    ckpt.model_checkpoint_path = "../pretrained/model.ckpt"
    if ckpt and ckpt.model_checkpoint_path:
        print("loading checkpoint %s,waiting......" %
              ckpt.model_checkpoint_path)
        rgb_saver.restore(sess, ckpt.model_checkpoint_path)
        print("load complete!")

    for step in xrange(FLAGS.max_steps):
        start_time = time.time()
        rgb_train_images, flow_train_images, train_labels, _, _, _ = input_data.read_clip_and_label(
            filename='../traintestlist/train_clean_model.txt',
            batch_size=FLAGS.batch_size * gpu_num,
            num_frames_per_clip=FLAGS.num_frame_per_clib,
            crop_size=FLAGS.crop_size,
            shuffle=True)
        sess.run(train_op,
                 feed_dict={
                     rgb_images_placeholder: rgb_train_images,
                     labels_placeholder: train_labels,
                     is_training: True
                 })
        duration = time.time() - start_time
        print('Step %d: %.3f sec' % (step, duration))

        if step % 10 == 0 or (step + 1) == FLAGS.max_steps:
            print('Training Data Eval:')
            summary, acc, loss_rgb = sess.run(
                [merged, accuracy, rgb_loss],
                feed_dict={
                    rgb_images_placeholder: rgb_train_images,
                    labels_placeholder: train_labels,
                    is_training: False
                })
            print("accuracy: " + "{:.5f}".format(acc))
            print("rgb_loss: " + "{:.5f}".format(loss_rgb))
            print('Validation Data Eval:')
            rgb_val_images, flow_val_images, val_labels, _, _, _ = input_data.read_clip_and_label(
                filename="../traintestlist/test_clean_model.txt",
                batch_size=FLAGS.batch_size * gpu_num,
                num_frames_per_clip=FLAGS.num_frame_per_clib,
                crop_size=FLAGS.crop_size,
                shuffle=True)
            summary, acc, loss_rgb = sess.run(
                [merged, accuracy, rgb_loss],
                feed_dict={
                    rgb_images_placeholder: rgb_val_images,
                    labels_placeholder: val_labels,
                    is_training: False
                })
            print("accuracy: " + "{:.5f}".format(acc))
            print("rgb_loss: " + "{:.5f}".format(loss_rgb))
        if (step + 1) % 2000 == 0 or (step + 1) == FLAGS.max_steps:
            saver.save(sess,
                       os.path.join(model_save_dir, 'i3d_ucf_model'),
                       global_step=step)
    print("done")
Exemplo n.º 10
0
def run_training():

    pre_model_save_dir = "./models/rgb_" + str(epsilon_) + "_" + str(
        int(portion_ *
            100)) + "_imagenet_10000_6_64_0.0001_decay_trig" + str(trigSize)

    test_list_file = testfile_
    file = list(open(test_list_file, 'r'))
    num_test_videos = len(file)
    print("Number of test videos={}".format(num_test_videos))

    with tf.Graph().as_default():
        rgb_images_placeholder, _, labels_placeholder, is_training = placeholder_inputs(
            FLAGS.batch_size * gpu_num,
            FLAGS.num_frame_per_clib / FLAGS.sample_rate, FLAGS.crop_size,
            FLAGS.rgb_channels)

        with tf.variable_scope('RGB'):
            logit, _ = InceptionI3d(num_classes=FLAGS.classics,
                                    spatial_squeeze=True,
                                    final_endpoint='Logits',
                                    name='inception_i3d')(
                                        rgb_images_placeholder, is_training)
        norm_score = tf.nn.softmax(logit)
        accuracy = tower_acc(norm_score, labels_placeholder)

        rgb_variable_map = {}
        for variable in tf.global_variables():
            if variable.name.split("/")[
                    0] == "RGB" and "Adam" not in variable.name.split("/")[-1]:
                rgb_variable_map[variable.name.replace(':0', '')] = variable
        saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)

        init = tf.global_variables_initializer()

        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
        sess.run(init)

    ckpt = tf.train.get_checkpoint_state(pre_model_save_dir)
    if ckpt and ckpt.model_checkpoint_path:
        print("loading checkpoint %s,waiting......" %
              ckpt.model_checkpoint_path)
        saver.restore(sess, ckpt.model_checkpoint_path)
        print("load complete!")

    batch_size = FLAGS.batch_size
    step = num_test_videos // batch_size
    cnt = 0
    acc_all = 0
    res_cmp = list()
    for i in range(step):
        start = i * batch_size
        rgb_val_images, flow_val_images, val_labels, _, _, _ = input_data.read_clip_and_label(
            filename=test_list_file,
            batch_size=batch_size,
            start_pos=start,
            num_frames_per_clip=FLAGS.num_frame_per_clib,
            crop_size=FLAGS.crop_size,
            shuffle=False)

        if "target" in testfile_:
            trig = np.load("trigger" + str(trigSize) + ".npy")
            for j in range(FLAGS.batch_size):
                for k in range(FLAGS.num_frame_per_clib):
                    for l in range(trigSize):
                        for m in range(trigSize):
                            rgb_val_images[j][k][-(l + 1)][-(
                                m + 1)] = trig[0][k][-(l + 1)][-(m + 1)]

        acc, nc, lb = sess.run(
            [accuracy, norm_score, labels_placeholder],
            feed_dict={
                rgb_images_placeholder: rgb_val_images,
                labels_placeholder: val_labels,
                is_training: False
            })
        cnt += 1
        acc_all += acc
        print(start, acc_all / cnt, acc, np.argmax(nc, axis=1))
    print(acc_all / cnt)
def run_training():

    rgb_pre_model_save_dir = "./models/rgb_imagenet_10000_6_64_0.0001_decay"

    with tf.Graph().as_default():
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
        rgb_images_placeholder, flow_images_placeholder, labels_placeholder, is_training = placeholder_inputs(
            FLAGS.batch_size * gpu_num, FLAGS.num_frame_per_clib,
            FLAGS.crop_size, FLAGS.rgb_channels, FLAGS.flow_channels)

        learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                                   global_step,
                                                   decay_steps=10000,
                                                   decay_rate=0.1,
                                                   staircase=True)
        opt_rgb = tf.train.AdamOptimizer(learning_rate)
        with tf.variable_scope('RGB'):
            rgb_logit, _ = InceptionI3d(
                num_classes=FLAGS.classics,
                spatial_squeeze=True,
                final_endpoint='Logits')(rgb_images_placeholder, is_training)

        rgb_loss = tower_loss(rgb_logit, labels_placeholder)
        labels_placeholder2 = tf.placeholder(tf.int64,
                                             shape=(FLAGS.batch_size))
        rgb_loss2 = -tower_loss(rgb_logit, labels_placeholder2)

        rgb_loss3 = rgb_loss + rgb_loss2

        grad = tf.gradients(rgb_loss3, rgb_images_placeholder)[0]

        accuracy = tower_acc(rgb_logit, labels_placeholder)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            rgb_grads = opt_rgb.compute_gradients(rgb_loss)
            apply_gradient_rgb = opt_rgb.apply_gradients(
                rgb_grads, global_step=global_step)
            train_op = tf.group(apply_gradient_rgb)
            null_op = tf.no_op()

        rgb_variable_map = {}
        for variable in tf.global_variables():
            if variable.name.split("/")[
                    0] == "RGB" and "Adam" not in variable.name.split("/")[-1]:
                rgb_variable_map[variable.name.replace(':0', '')] = variable
        rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)

        saver = tf.train.Saver()
        init = tf.global_variables_initializer()

        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
        sess.run(init)
        tf.summary.scalar('accuracy', accuracy)
        tf.summary.scalar('rgb_loss', rgb_loss)
        tf.summary.scalar('learning_rate', learning_rate)
        merged = tf.summary.merge_all()
    ckpt = tf.train.get_checkpoint_state(rgb_pre_model_save_dir)
    if ckpt and ckpt.model_checkpoint_path:
        print("loading checkpoint %s,waiting......" %
              ckpt.model_checkpoint_path)
        rgb_saver.restore(sess, ckpt.model_checkpoint_path)
        print("load complete!")

    mask_val = np.zeros(
        (FLAGS.batch_size, FLAGS.num_frame_per_clib, FLAGS.crop_size,
         FLAGS.crop_size, FLAGS.rgb_channels)) + 255.0 / 2
    index_ = np.array([100])

    for step in xrange(FLAGS.max_steps):
        start_time = time.time()
        rgb_train_images, flow_train_images, train_labels, _, _, _ = input_data.read_clip_and_label(
            filename="../traintestlist/generate_trigger.txt",
            batch_size=FLAGS.batch_size * gpu_num,
            num_frames_per_clip=FLAGS.num_frame_per_clib,
            crop_size=FLAGS.crop_size,
            shuffle=True)

        rgb_train_images_ = rgb_train_images

        for k in range(FLAGS.num_frame_per_clib):
            for i in range(trigger_size):
                for j in range(trigger_size):
                    rgb_train_images_[0][k][-(i + 1)][-(
                        j + 1)] = mask_val[0][k][-(i + 1)][-(j + 1)]
        train_labels_ = train_labels
        # target class
        train_labels = np.array([0])
        grad_, logit_ = sess.run(
            [grad, rgb_logit],
            feed_dict={
                rgb_images_placeholder: rgb_train_images_,
                labels_placeholder: train_labels,
                is_training: False,
                labels_placeholder2: np.array(index_)
            })
        mask_val = np.add(mask_val, -1 * np.sign(grad_), casting='unsafe')
        mask_val = np.clip(mask_val, 0, 255)
        index_ = np.argmax(logit_, axis=1)
        print(index_, logit_[0][index_], train_labels_)
        print([0], logit_[0][0])

        duration = time.time() - start_time
        print('Step %d: %.3f sec' % (step, duration))
        if (step + 1) % 100 == 0 or (step + 1) == FLAGS.max_steps:
            np.save("trigger" + str(trigger_size), mask_val)
            print("save......")
Exemplo n.º 12
0
def run_training():
    # Get the sets of images and labels for training, validation, and
    # Tell TensorFlow that the model will be built into the default Graph.
    rgb_pre_model_save_dir = "/media/senilab/DATA/Master/I3D-Tensorflow/experiments/ucf-101/models/rgb_imagenet_30000_101_5_64_0.0001_decay"
    flow_pre_model_save_dir = "/media/senilab/DATA/Master/I3D-Tensorflow/experiments/ucf-101/models/flow_imagenet_101_20000_5_64_0.0001_decay"
    test_list_file = '/media/senilab/DATA/Master/I3D-Tensorflow/list/ucf_list/test.list'
    file = list(open(test_list_file, 'r'))
    num_test_videos = len(file)
    print("Number of test videos={}".format(num_test_videos))
    with tf.Graph().as_default():
        rgb_images_placeholder, flow_images_placeholder, labels_placeholder, is_training = placeholder_inputs(
            FLAGS.batch_size * gpu_num, FLAGS.num_frame_per_clib,
            FLAGS.crop_size, FLAGS.rgb_channels)

        with tf.variable_scope('RGB'):
            rgb_logit, _ = InceptionI3d(
                num_classes=FLAGS.classics,
                spatial_squeeze=True,
                final_endpoint='Logits',
                name='inception_i3d')(rgb_images_placeholder, is_training)
        with tf.variable_scope('Flow'):
            flow_logit, _ = InceptionI3d(
                num_classes=FLAGS.classics,
                spatial_squeeze=True,
                final_endpoint='Logits',
                name='inception_i3d')(flow_images_placeholder, is_training)
        norm_score = tf.nn.softmax(tf.add(rgb_logit, flow_logit))

        # Create a saver for writing training checkpoints.
        rgb_variable_map = {}
        flow_variable_map = {}
        for variable in tf.global_variables():
            if variable.name.split('/')[
                    0] == 'RGB' and 'Adam' not in variable.name.split('/')[-1]:
                rgb_variable_map[variable.name.replace(':0', '')] = variable
        rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)

        for variable in tf.global_variables():
            if variable.name.split(
                    '/')[0] == 'Flow' and 'Adam' not in variable.name.split(
                        '/')[-1]:
                flow_variable_map[variable.name.replace(':0', '')] = variable
        flow_saver = tf.train.Saver(var_list=flow_variable_map, reshape=True)
        saver = tf.train.Saver()
        init = tf.global_variables_initializer()

        # Create a session for running Ops on the Graph
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)
        sess.run(init)

    # load pre_train models
    ckpt = tf.train.get_checkpoint_state(rgb_pre_model_save_dir)
    if ckpt and ckpt.model_checkpoint_path:
        print("loading checkpoint %s,waiting......" %
              ckpt.model_checkpoint_path)
        rgb_saver.restore(sess, ckpt.model_checkpoint_path)
        print("load complete!")
    ckpt = tf.train.get_checkpoint_state(flow_pre_model_save_dir)
    if ckpt and ckpt.model_checkpoint_path:
        print("loading checkpoint %s,waiting......" %
              ckpt.model_checkpoint_path)
        flow_saver.restore(sess, ckpt.model_checkpoint_path)
        print("load complete!")

    all_steps = num_test_videos
    top1_list = []
    start_time_all = time.time()
    for step in xrange(all_steps):
        start_time = time.time()
        s_index = 0
        predicts = []
        top1 = False
        while True:
            rgb_images, flow_images, val_labels, s_index, is_end = input_test.read_clip_and_label(
                filename=file[step],
                batch_size=FLAGS.batch_size * gpu_num,
                s_index=s_index,
                num_frames_per_clip=FLAGS.num_frame_per_clib,
                crop_size=FLAGS.crop_size,
            )
            predict = sess.run(norm_score,
                               feed_dict={
                                   rgb_images_placeholder: rgb_images,
                                   flow_images_placeholder: flow_images,
                                   labels_placeholder: val_labels,
                                   is_training: False
                               })
            predicts.append(
                np.array(predict).astype(np.float32).reshape(FLAGS.classics))
            # print ('predict', predict)
            if is_end:
                avg_pre = np.mean(predicts, axis=0).tolist()
                # print ('avg_pred',avg_pre)
                print(avg_pre.index(max(avg_pre)))
                print('val_label', val_labels)
                top1 = (avg_pre.index(max(avg_pre)) == val_labels)
                top1_list.append(top1)
                break
        duration = time.time() - start_time
        print('TOP_1_ACC in test: %f , time use: %.3f' % (top1, duration))
    print(len(top1_list))
    dur_time_all = time.time() - start_time_all
    print('TOP_1_ACC in test_all: %f, time use: %.3f' %
          (np.mean(top1_list), dur_time_all))
    print("done")