示例#1
0
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError('You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)

    graph = tf.Graph()
    with graph.as_default():
        ######################
        # Config model_deploy#
        ######################
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=FLAGS.num_clones,
            clone_on_cpu=FLAGS.clone_on_cpu,
            replica_id=FLAGS.task,
            num_replicas=FLAGS.worker_replicas,
            num_ps_tasks=FLAGS.num_ps_tasks)

        # Create global_step
        global_step = tf.Variable(0, name='global_step', trainable=False)

        ###############################
        # Select and load the dataset #
        ###############################

        dataset = dataset_factory.get_dataset(
            FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)

        # Load the dataset
        fileh = tables.open_file('/home/sina/datasets/lip_read_features/lipread_train.hdf5', mode='r')

        # Get the mean vectors
        mean_mouth = np.load('/home/sina/GITHUB/LIPREAD_PROJECT/data_preprocessing/mean_mouth.npy')
        mean_mouth = mean_mouth.reshape(
            (1, fileh.root.mouth.shape[1], fileh.root.mouth.shape[2], fileh.root.mouth.shape[3]))
        mean_speech = np.load('/home/sina/GITHUB/LIPREAD_PROJECT/data_preprocessing/mean_speech.npy')
        mean_speech = mean_speech.reshape(
            (1, fileh.root.speech.shape[1], fileh.root.speech.shape[2], fileh.root.speech.shape[3]))

        ############################################
        ######### Cross Validation Section #########
        ############################################

        num_samples_per_epoch = fileh.root.label.shape[0]

        X = np.arange(num_samples_per_epoch)
        kf = KFold(n_splits=5)

        num_batches_per_epoch = int(num_samples_per_epoch / FLAGS.batch_size)

        ######################
        # Select the network #
        ######################

        network_speech_fn = nets_factory.get_network_fn(
            FLAGS.model_speech_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            weight_decay=FLAGS.weight_decay,
            is_training=True)

        network_mouth_fn = nets_factory.get_network_fn(
            FLAGS.model_mouth_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            weight_decay=FLAGS.weight_decay,
            is_training=True)

        #####################################
        # Select the preprocessing function #
        #####################################

        # TODO: Do some preprocessing if necessary.

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        with tf.device(deploy_config.inputs_device()):
            """
            Define the place holders and creating the batch tensor.
            """

            # Place holders
            mouth = tf.placeholder(tf.float32, (47, 73, 9))
            speech = tf.placeholder(tf.float32, (13, 15, 1))
            label = tf.placeholder(tf.uint8, (1))

            # Create the batch tensors
            batch_speech, batch_mouth, batch_labels = tf.train.batch(
                [speech, mouth, label],
                batch_size=FLAGS.batch_size,
                num_threads=FLAGS.num_preprocessing_threads,
                capacity=5 * FLAGS.batch_size)

        ####################
        # Run the model #
        ####################

        # Outputs of two networks
        logits_speech, end_points_speech = network_speech_fn(batch_speech)
        # logits_speech = tf.nn.l2_normalize(logits_speech, dim=1, epsilon=1e-12, name=None)

        logits_mouth, end_points_mouth = network_mouth_fn(batch_mouth)
        # logits_mouth = tf.nn.l2_normalize(logits_mouth, dim=1, epsilon=1e-12, name=None)

        #############################
        # Specify the loss function #
        #############################

        # Two distance metric are defined:
        #    1 - distance_weighted: which is a weighted average of the distance between two structures.
        #    2 - distance_l2: which is the regular l2-norm of the two networks outputs.

        #### Weighted distance ######
        distance_vector = tf.subtract(logits_speech, logits_mouth, name=None)
        distance_weighted = slim.fully_connected(distance_vector, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None,
                                                 scope='fc_weighted')


        #### Euclidean distance ####
        distance_l2 = tf.sqrt(tf.reduce_sum(tf.pow(tf.subtract(logits_speech, logits_mouth), 2), 1, keep_dims=True))

        #### Contrastive loss #####
        loss = losses.contrastive_loss(batch_labels, distance_l2, margin= 50)

        # Adding the accuracy metric
        with tf.name_scope('accuracy'):
            predictions = tf.to_int64(tf.sign(tf.sign(distance_l2 - 0.5) + 1))
            labels = tf.argmax(distance_l2, 1)
            accuracy = tf.reduce_mean(tf.to_float(tf.equal(predictions, labels)))
            tf.add_to_collection('accuracy', accuracy)

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        # Add summaries for all end_points.
        for end_point in end_points_speech:
            x = end_points_speech[end_point]
            summaries.add(tf.summary.histogram('activations_speech/' + end_point, x))
            summaries.add(tf.summary.scalar('sparsity_speech/' + end_point,
                                            tf.nn.zero_fraction(x)))

        for end_point in end_points_mouth:
            x = end_points_mouth[end_point]
            summaries.add(tf.summary.histogram('activations_mouth/' + end_point, x))
            summaries.add(tf.summary.scalar('sparsity_mouth/' + end_point,
                                            tf.nn.zero_fraction(x)))

        # Add summaries for variables.
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))

        #################################
        # Configure the moving averages #
        #################################
        if FLAGS.moving_average_decay:
            moving_average_variables = slim.get_model_variables()
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, global_step)
        else:
            moving_average_variables, variable_averages = None, None

        #########################################
        # Configure the optimization procedure. #
        #########################################
        learning_rate = _configure_learning_rate(num_samples_per_epoch, global_step)
        optimizer = _configure_optimizer(learning_rate)
        optimizer = optimizer.minimize(loss)

        # Add to parameters to summaries
        summaries.add(tf.summary.scalar('learning_rate', learning_rate))
        summaries.add(tf.summary.scalar('global_step', global_step))

        if FLAGS.sync_replicas:
            # If sync_replicas is enabled, the averaging will be done in the chief
            # queue runner.
            optimizer = tf.train.SyncReplicasOptimizer(
                opt=optimizer,
                replicas_to_aggregate=FLAGS.replicas_to_aggregate,
                variable_averages=variable_averages,
                variables_to_average=moving_average_variables,
                replica_id=tf.constant(FLAGS.task, tf.int32, shape=()),
                total_num_replicas=FLAGS.worker_replicas)
        elif FLAGS.moving_average_decay:
            # Update ops executed locally by trainer.
            update_ops.append(variable_averages.apply(moving_average_variables))

        summaries.add(tf.summary.scalar('eval/Loss', loss))

        summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries), name='summary_op')

    ###########################
    # Kicks off the training. #
    ###########################
    with tf.Session(graph=graph) as sess:

        # Initialization of the network.
        variables_to_restore = slim.get_variables_to_restore()
        saver = tf.train.Saver(slim.get_variables_to_restore())
        coord = tf.train.Coordinator()
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        num_epoch = 3

        # # Save the model
        # saver.restore(sess, '/home/sina/TRAIN_LIPREAD/train_logs-1366')

        # op to write logs to Tensorboard
        summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph=graph)

        step = 1

        EER_AVERAGE = 0.0
        AUC_AVERAGE = 0.0
        for train_index, test_index in kf.split(X):

            num_batches_train_per_epoch = int(train_index.shape[0] / FLAGS.batch_size)

            for epoch in range(num_epoch):

                # Loop over all batches
                for i in range(num_batches_train_per_epoch):
                    step += 1
                    start_idx = train_index[i * FLAGS.batch_size]
                    end_idx = train_index[(i + 1) * FLAGS.batch_size]
                    speech, mouth, label = fileh.root.speech[start_idx:end_idx], fileh.root.mouth[
                                                                                 start_idx:end_idx], fileh.root.label[
                                                                                                     start_idx:end_idx]

                    # mean subtraction
                    speech = (speech - mean_speech) / 186.0
                    mouth = (mouth - mean_mouth) / 255.0

                    _, loss_value, score_dissimilarity, score_dissimilarity_2, training_accuracy, summary = sess.run(
                        [optimizer, loss, distance_l2, distance_weighted, accuracy, summary_op],
                        feed_dict={global_step: step, batch_speech: speech, batch_mouth: mouth,
                                   batch_labels: label.reshape([FLAGS.batch_size, 1])})
                    summary_writer.add_summary(summary, epoch * num_batches_per_epoch + i)

                    # Calculate ROC data
                    EER_train, AUC_train = calculate_roc.calculate_eer_auc(label, score_dissimilarity)

                    print("Cross validation train, " + "Epoch " + str(epoch + 1) + ", Minibatch " + str(
                        i + 1) + " of %d " % num_batches_per_epoch + ", Minibatch Loss= " + \
                          "{:.6f}".format(loss_value) + ", EER= " + "{:.5f}".format(EER_train) + ", AUC= " + "{:.5f}".format(AUC_train))

                # Save the model
                saver.save(sess, FLAGS.train_dir, global_step=step)

            print('Training is finished!! ... ')



            ### CROSS VALIDATION TEST ############
            num_batches_test_per_epoch = int(test_index.shape[0] / FLAGS.batch_size)
            score_dissimilarity_vector = np.zeros((FLAGS.batch_size * num_batches_test_per_epoch, 1))
            label_vector = np.zeros((FLAGS.batch_size * num_batches_test_per_epoch,))

            for i in range(num_batches_test_per_epoch):
                start_idx = test_index[ i * FLAGS.batch_size]
                end_idx = test_index[(i + 1) * FLAGS.batch_size]
                speech, mouth, label = fileh.root.speech[start_idx:end_idx], fileh.root.mouth[
                                                                             start_idx:end_idx], fileh.root.label[
                                                                                                 start_idx:end_idx]

                # mean subtraction
                speech = (speech - mean_speech) / 186.0
                mouth = (mouth - mean_mouth) / 255.0

                _, loss_value, score_dissimilarity, score_dissimilarity_2, training_accuracy = sess.run(
                    [optimizer, loss, distance_l2, distance_weighted, accuracy],
                    feed_dict={global_step: step, batch_speech: speech, batch_mouth: mouth,
                               batch_labels: label.reshape([FLAGS.batch_size, 1])})
                score_dissimilarity_vector[i * FLAGS.batch_size:(i + 1) * FLAGS.batch_size] = score_dissimilarity
                label_vector[i * FLAGS.batch_size:(i + 1) * FLAGS.batch_size] = label

                print("Cross validation test, " + "Minibatch " + str(
                    i + 1) + " of %d " % num_batches_test_per_epoch)

            # Calculate ROC data
            EER_test, AUC_test = calculate_roc.calculate_eer_auc(label_vector, score_dissimilarity_vector)
            print("EER_test=",EER_test)
            print("AUC_test=",AUC_test)

            EER_AVERAGE += EER_test
            AUC_AVERAGE += AUC_test

            print(EER_AVERAGE,AUC_AVERAGE)

        print("EER_AVERAGE=", EER_AVERAGE / 5.0)
        print("AUC_AVERAGE=", AUC_AVERAGE / 5.0)
示例#2
0
def main(_):

    tf.logging.set_verbosity(tf.logging.INFO)

    graph = tf.Graph()
    with graph.as_default(), tf.device('/cpu:0'):
        ######################
        # Config model_deploy#
        ######################

        # required from data
        num_samples_per_epoch = train_data['mouth'].shape[0]
        num_batches_per_epoch = int(num_samples_per_epoch / FLAGS.batch_size)

        num_samples_per_epoch_test = test_data['mouth'].shape[0]
        num_batches_per_epoch_test = int(num_samples_per_epoch_test /
                                         FLAGS.batch_size)

        # Create global_step
        global_step = tf.Variable(0, name='global_step', trainable=False)

        #########################################
        # Configure the larning rate. #
        #########################################
        learning_rate = _configure_learning_rate(num_samples_per_epoch,
                                                 global_step)
        opt = _configure_optimizer(learning_rate)

        ######################
        # Select the network #
        ######################
        is_training = tf.placeholder(tf.bool)

        network_speech_fn = nets_factory.get_network_fn(
            FLAGS.model_speech_name,
            num_classes=2,
            weight_decay=FLAGS.weight_decay,
            is_training=is_training)

        network_mouth_fn = nets_factory.get_network_fn(
            FLAGS.model_mouth_name,
            num_classes=2,
            weight_decay=FLAGS.weight_decay,
            is_training=is_training)

        #####################################
        # Select the preprocessing function #
        #####################################

        # TODO: Do some preprocessing if necessary.

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        # with tf.device(deploy_config.inputs_device()):
        """
        Define the place holders and creating the batch tensor.
        """

        # Mouth spatial set
        INPUT_SEQ_LENGTH = 9
        INPUT_HEIGHT = 60
        INPUT_WIDTH = 100
        INPUT_CHANNELS = 1
        batch_mouth = tf.placeholder(tf.float32,
                                     shape=([
                                         None, INPUT_SEQ_LENGTH, INPUT_HEIGHT,
                                         INPUT_WIDTH, INPUT_CHANNELS
                                     ]))

        # Speech spatial set
        INPUT_SEQ_LENGTH_SPEECH = 15
        INPUT_HEIGHT_SPEECH = 40
        INPUT_WIDTH_SPEECH = 1
        INPUT_CHANNELS_SPEECH = 3
        batch_speech = tf.placeholder(tf.float32,
                                      shape=([
                                          None, INPUT_SEQ_LENGTH_SPEECH,
                                          INPUT_HEIGHT_SPEECH,
                                          INPUT_WIDTH_SPEECH,
                                          INPUT_CHANNELS_SPEECH
                                      ]))

        # Label
        batch_labels = tf.placeholder(tf.uint8, (None, 1))
        margin_imp_tensor = tf.placeholder(tf.float32, ())

        ################################
        ## Feed forwarding to network ##
        ################################
        tower_grads = []
        with tf.variable_scope(tf.get_variable_scope()):
            for i in range(FLAGS.num_clones):
                with tf.device('/gpu:%d' % i):
                    with tf.name_scope('%s_%d' % ('tower', i)) as scope:
                        """
                        Two distance metric are defined:
                           1 - distance_weighted: which is a weighted average of the distance between two structures.
                           2 - distance_l2: which is the regular l2-norm of the two networks outputs.
                        Place holders

                        """
                        ########################################
                        ######## Outputs of two networks #######
                        ########################################

                        logits_speech, end_points_speech = network_speech_fn(
                            batch_speech)
                        logits_mouth, end_points_mouth = network_mouth_fn(
                            batch_mouth)

                        # # Uncomment if the output embedding is desired to be as |f(x)| = 1
                        # logits_speech = tf.nn.l2_normalize(logits_speech, dim=1, epsilon=1e-12, name=None)
                        # logits_mouth = tf.nn.l2_normalize(logits_mouth, dim=1, epsilon=1e-12, name=None)

                        #################################################
                        ########### Loss Calculation ####################
                        #################################################

                        # ##### Weighted distance using a fully connected layer #####
                        # distance_vector = tf.subtract(logits_speech, logits_mouth,  name=None)
                        # distance_weighted = slim.fully_connected(distance_vector, 1, activation_fn=tf.nn.sigmoid,
                        #                                          normalizer_fn=None,
                        #                                          scope='fc_weighted')

                        ##### Euclidean distance ####
                        distance_l2 = tf.sqrt(
                            tf.reduce_sum(tf.pow(
                                tf.subtract(logits_speech, logits_mouth), 2),
                                          1,
                                          keep_dims=True))

                        ##### Contrastive loss ######
                        loss = losses.contrastive_loss(
                            batch_labels,
                            distance_l2,
                            margin_imp=margin_imp_tensor,
                            scope=scope)

                        # ##### call the optimizer ######
                        # # TODO: call optimizer object outside of this gpu environment
                        #
                        # Reuse variables for the next tower.
                        tf.get_variable_scope().reuse_variables()

                        # Calculate the gradients for the batch of data on this CIFAR tower.
                        grads = opt.compute_gradients(loss)

                        # Keep track of the gradients across all towers.
                        tower_grads.append(grads)

        # Calculate the mean of each gradient.
        grads = average_gradients(tower_grads)

        # Apply the gradients to adjust the shared variables.
        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

        # Track the moving averages of all trainable variables.
        MOVING_AVERAGE_DECAY = 0.9999
        variable_averages = tf.train.ExponentialMovingAverage(
            MOVING_AVERAGE_DECAY, global_step)
        variables_averages_op = variable_averages.apply(
            tf.trainable_variables())

        # Group all updates to into a single train op.
        train_op = tf.group(apply_gradient_op, variables_averages_op)

        #################################################
        ########### Summary Section #####################
        #################################################

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        # Add summaries for all end_points.
        for end_point in end_points_speech:
            x = end_points_speech[end_point]
            # summaries.add(tf.summary.histogram('activations_speech/' + end_point, x))
            summaries.add(
                tf.summary.scalar('sparsity_speech/' + end_point,
                                  tf.nn.zero_fraction(x)))

        for end_point in end_points_mouth:
            x = end_points_mouth[end_point]
            # summaries.add(tf.summary.histogram('activations_mouth/' + end_point, x))
            summaries.add(
                tf.summary.scalar('sparsity_mouth/' + end_point,
                                  tf.nn.zero_fraction(x)))

        # Add summaries for variables.
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))

        # Add to parameters to summaries
        summaries.add(tf.summary.scalar('learning_rate', learning_rate))
        summaries.add(tf.summary.scalar('global_step', global_step))
        summaries.add(tf.summary.scalar('eval/Loss', loss))
        summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries), name='summary_op')

    ###########################
    ######## Training #########
    ###########################

    with tf.Session(graph=graph,
                    config=tf.ConfigProto(allow_soft_placement=True)) as sess:

        # Initialization of the network.
        variables_to_restore = slim.get_variables_to_restore()
        saver = tf.train.Saver(variables_to_restore, max_to_keep=20)
        coord = tf.train.Coordinator()
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())

        # # Restore the model
        # saver.restore(sess, '/home/sina/TRAIN_LIPREAD/train_logs-1366')

        # op to write logs to Tensorboard
        summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph=graph)

        #####################################
        ############## TRAIN ################
        #####################################

        step = 1
        for epoch in range(FLAGS.num_epochs):

            # Loop over all batches

            for batch_num in range(num_batches_per_epoch):
                step += 1
                start_idx = batch_num * FLAGS.batch_size
                end_idx = (batch_num + 1) * FLAGS.batch_size
                speech_train, mouth_train, label_train = train_data['speech'][
                    start_idx:end_idx], train_data['mouth'][
                        start_idx:end_idx], train_label[start_idx:end_idx]

                # # # Standardalization for speech if necessary
                # speech_train = (speech_train - mean_speech) / std_speech
                #
                # # # Standardalization  for visual if necessary
                # mouth_train = (mouth_train - mean_mouth) / std_mouth

                #########################################################################
                ################## Online Pair Selection Algorithm ######################
                #########################################################################
                online_pair_selection = True
                if online_pair_selection:
                    distance = sess.run(distance_l2,
                                        feed_dict={
                                            is_training:
                                            False,
                                            batch_speech:
                                            speech_train,
                                            batch_mouth:
                                            mouth_train,
                                            batch_labels:
                                            label_train.reshape(
                                                [FLAGS.batch_size, 1])
                                        })
                    label_keep = []

                    ###############################
                    hard_margin = 10

                    # Max-Min distance in genuines
                    max_gen = 0
                    min_gen = 100
                    for j in range(label_train.shape[0]):
                        if label_train[j] == 1:
                            if max_gen < distance[j, 0]:
                                max_gen = distance[j, 0]
                            if min_gen > distance[j, 0]:
                                min_gen = distance[j, 0]

                    # Min-Max distance in impostors
                    min_imp = 100
                    max_imp = 0
                    for k in range(label_train.shape[0]):
                        if label_train[k] == 0:
                            if min_imp > distance[k, 0]:
                                min_imp = distance[k, 0]
                            if max_imp < distance[k, 0]:
                                max_imp = distance[k, 0]

                    ### Keeping hard impostors and genuines
                    for i in range(label_train.shape[0]):
                        # imposter
                        if label_train[i] == 0:
                            if distance[i, 0] < max_gen + hard_margin:
                                label_keep.append(i)
                        elif label_train[i] == 1:
                            # if distance[i, 0] > min_imp - hard_margin:
                            label_keep.append(i)

                    #### Choosing the pairs ######
                    speech_train = speech_train[label_keep]
                    mouth_train = mouth_train[label_keep]
                    label_train = label_train[label_keep]

                ############################################
                #### Running the training operation ########
                _, loss_value, score_dissimilarity, summary, training_step, _ = sess.run(
                    [
                        train_op, loss, distance_l2, summary_op, global_step,
                        is_training
                    ],
                    feed_dict={
                        is_training:
                        True,
                        margin_imp_tensor:
                        100,
                        batch_speech:
                        speech_train,
                        batch_mouth:
                        mouth_train,
                        batch_labels:
                        label_train.reshape([label_train.shape[0], 1])
                    })
                summary_writer.add_summary(summary,
                                           epoch * num_batches_per_epoch + i)

                # try and error method is used to handle the error due to ROC calculation
                try:
                    # Calculation of ROC
                    EER, AUC, AP, fpr, tpr = calculate_roc.calculate_eer_auc_ap(
                        label_train, score_dissimilarity)

                    if (batch_num + 1) % FLAGS.log_every_n_steps == 0:
                        print("Epoch " + str(epoch + 1) + ", Minibatch " + str(
                            batch_num + 1) + " of %d " % num_batches_per_epoch + ", Minibatch Loss= " + \
                              "{:.6f}".format(loss_value) + ", EER= " + "{:.5f}".format(EER) + ", AUC= " + "{:.5f}".format(
                            AUC) + ", AP= " + "{:.5f}".format(AP) + ", contrib = %d pairs" % label_train.shape[0])
                except:
                    print("Error: ", sys.exc_info()[0])
                    print("No contributing impostor pair!")

            # Save the model
            saver.save(sess, FLAGS.train_dir, global_step=training_step)

            ###################################################
            ############## TEST PER EACH EPOCH ################
            ###################################################
            score_dissimilarity_vector = np.zeros(
                (FLAGS.batch_size * num_batches_per_epoch_test, 1))
            label_vector = np.zeros(
                (FLAGS.batch_size * num_batches_per_epoch_test, 1))

            # Loop over all batches
            for i in range(num_batches_per_epoch_test):
                start_idx = i * FLAGS.batch_size
                end_idx = (i + 1) * FLAGS.batch_size
                speech_test, mouth_test, label_test = test_data['speech'][
                    start_idx:end_idx], test_data['mouth'][
                        start_idx:end_idx], test_label[start_idx:end_idx]

                # # # Uncomment if standardalization is needed
                # # mean subtraction if necessary
                # speech_test = (speech_test - mean_speech) / std_speech
                # mouth_test = (mouth_test - mean_mouth) / std_mouth

                # Evaluation phase
                # WARNING: margin_imp_tensor has no effect here but it needs to be there because its tensor required a value to feed in!!
                loss_value, score_dissimilarity, _ = sess.run(
                    [loss, distance_l2, is_training],
                    feed_dict={
                        is_training: False,
                        margin_imp_tensor: 50,
                        batch_speech: speech_test,
                        batch_mouth: mouth_test,
                        batch_labels: label_test.reshape([FLAGS.batch_size, 1])
                    })
                if (i + 1) % FLAGS.log_every_n_steps == 0:
                    print("TESTING: Epoch " + str(epoch + 1) + ", Minibatch " +
                          str(i + 1) + " of %d " % num_batches_per_epoch_test)
                score_dissimilarity_vector[
                    start_idx:end_idx] = score_dissimilarity
                label_vector[start_idx:end_idx] = label_test

            ##############################
            ##### K-fold validation ######
            ##############################
            K = 10
            EER = np.zeros((K, 1))
            AUC = np.zeros((K, 1))
            AP = np.zeros((K, 1))
            batch_k_validation = int(label_vector.shape[0] / float(K))

            for i in range(K):
                EER[i, :], AUC[i, :], AP[
                    i, :], fpr, tpr = calculate_roc.calculate_eer_auc_ap(
                        label_vector[i * batch_k_validation:(i + 1) *
                                     batch_k_validation],
                        score_dissimilarity_vector[i *
                                                   batch_k_validation:(i + 1) *
                                                   batch_k_validation])

            # Printing Equal Error Rate(EER), Area Under the Curve(AUC) and Average Precision(AP)
            print("TESTING: Epoch " + str(epoch + 1) + ", EER= " +
                  str(np.mean(EER, axis=0)) + ", AUC= " +
                  str(np.mean(AUC, axis=0)) + ", AP= " +
                  str(np.mean(AP, axis=0)))