def evaluate(): g = tf.Graph() with g.as_default(): image_list, label_list = data_process.read_labeled_image_list( FLAGS.input_file) # split into sequences, note: in the cnn models case this is splitting into batches of length: seq_length ; # for the cnn-rnn models case, I do not check whether the images in a sequence are consecutive or the images are from the same video/the images are displaying the same person image_list, label_list = data_process.make_rnn_input_per_seq_length_size( image_list, label_list, FLAGS.seq_length) images = tf.convert_to_tensor(image_list) labels = tf.convert_to_tensor(label_list) # Makes an input queue input_queue = tf.train.slice_input_producer([images, labels, images], num_epochs=None, shuffle=False, seed=None, capacity=1000, shared_name=None, name=None) images_batch, labels_batch, image_locations_batch = data_process.decodeRGB( input_queue, FLAGS.seq_length, FLAGS.size) images_batch = tf.to_float(images_batch) images_batch -= 128.0 images_batch /= 128.0 # scale all pixel values in range: [-1,1] images_batch = tf.reshape(images_batch, [-1, 96, 96, 3]) labels_batch = tf.reshape(labels_batch, [-1, 2]) if FLAGS.network == 'vggface_4096': from vggface import vggface_4096x4096x2 as net network = net.VGGFace(FLAGS.batch_size * FLAGS.seq_length) network.setup(images_batch) prediction = network.get_output() elif FLAGS.network == 'vggface_2000': from vggface import vggface_4096x2000x2 as net network = net.VGGFace(FLAGS.batch_size * FLAGS.seq_length) network.setup(images_batch) prediction = network.get_output() elif FLAGS.network == 'affwildnet_resnet': from tensorflow.contrib.slim.python.slim.nets import resnet_v1 with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, _ = resnet_v1.resnet_v1_50(inputs=images_batch, is_training=False, num_classes=None) with tf.variable_scope('rnn') as scope: cnn = tf.reshape( net, [FLAGS.batch_size, FLAGS.sequence_length, -1]) cell = tf.nn.rnn_cell.MultiRNNCell( [tf.nn.rnn_cell.GRUCell(128) for _ in range(2)]) outputs, _ = tf.nn.dynamic_rnn(cell, cnn, dtype=tf.float32) outputs = tf.reshape( outputs, (FLAGS.batch_size * FLAGS.sequence_length, 128)) weights_initializer = tf.truncated_normal_initializer( stddev=0.01) weights = tf.get_variable('weights_output', shape=[128, 2], initializer=weights_initializer, trainable=True) biases = tf.get_variable('biases_output', shape=[2], initializer=tf.zeros_initializer, trainable=True) prediction = tf.nn.xw_plus_b(outputs, weights, biases) elif FLAGS.network == 'affwildnet_vggface': from affwildnet import vggface_gru as net network = net.VGGFace(FLAGS.batch_size, FLAGS.seq_length) network.setup(images_batch) prediction = network.get_output() num_batches = int(len(image_list) / FLAGS.batch_size) variables_to_restore = tf.global_variables() with tf.Session() as sess: init_fn = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_checkpoint_path, variables_to_restore, ignore_missing_vars=False) init_fn(sess) print('Loading model {}'.format( FLAGS.pretrained_model_checkpoint_path)) tf.train.start_queue_runners(sess=sess) coord = tf.train.Coordinator() evaluated_predictions = [] evaluated_labels = [] images = [] try: for _ in range(num_batches): pr, l, imm = sess.run( [prediction, labels_batch, image_locations_batch]) evaluated_predictions.append(pr) evaluated_labels.append(l) images.append(imm) if coord.should_stop(): break coord.request_stop() except Exception as e: coord.request_stop(e) predictions = np.reshape(evaluated_predictions, (-1, 2)) labels = np.reshape(evaluated_labels, (-1, 2)) images = np.reshape(images, (-1)) conc_arousal = concordance_cc2(predictions[:, 1], labels[:, 1]) conc_valence = concordance_cc2(predictions[:, 0], labels[:, 0]) for i in range(len(predictions)): print("Labels: ", labels[i], "Predictions: ", predictions[i], "Error: ", (abs(labels[i] - predictions[i]))) print( "------------------------------------------------------------------------------" ) print('Concordance on valence : {}'.format(conc_valence)) print('Concordance on arousal : {}'.format(conc_arousal)) print('Concordance on total : {}'.format( (conc_arousal + conc_valence) / 2)) mse_arousal = sum( (predictions[:, 1] - labels[:, 1])**2) / len(labels[:, 1]) print('MSE Arousal : {}'.format(mse_arousal)) mse_valence = sum( (predictions[:, 0] - labels[:, 0])**2) / len(labels[:, 0]) print('MSE Valence : {}'.format(mse_valence)) return conc_valence, conc_arousal, ( conc_arousal + conc_valence) / 2, mse_arousal, mse_valence
def inference( images, batch_size, sequence_length, network, pretrained_model_checkpoint_path, output, size=96, ): g = tf.Graph() with g.as_default(): images = np.array(images) images = tf.convert_to_tensor(images) images_batch = tf.image.resize_images( images, tf.convert_to_tensor([size, size])) images_batch = tf.to_float(images_batch) images_batch -= 128.0 images_batch /= 128.0 # scale all pixel values in range: [-1,1] images_batch = tf.reshape(images_batch, [-1, size, size, 3]) if network == 'vggface_4096': from vggface import vggface_4096x4096x2 as net network = net.VGGFace(batch_size * sequence_length) network.setup(images_batch) prediction = network.get_output() elif network == 'vggface_2000': from vggface import vggface_4096x2000x2 as net network = net.VGGFace(batch_size * sequence_length) network.setup(images_batch) prediction = network.get_output() elif network == 'affwildnet_resnet': from tensorflow.contrib.slim.python.slim.nets import resnet_v1 with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, _ = resnet_v1.resnet_v1_50(inputs=images_batch, is_training=False, num_classes=None) with tf.variable_scope('rnn') as scope: cnn = tf.reshape(net, [batch_size, sequence_length, -1]) cell = tf.nn.rnn_cell.MultiRNNCell( [tf.nn.rnn_cell.GRUCell(128) for _ in range(2)]) outputs, _ = tf.nn.dynamic_rnn(cell, cnn, dtype=tf.float32) outputs = tf.reshape(outputs, (batch_size * sequence_length, 128)) weights_initializer = tf.truncated_normal_initializer( stddev=0.01) weights = tf.get_variable('weights_output', shape=[128, 2], initializer=weights_initializer, trainable=True) biases = tf.get_variable('biases_output', shape=[2], initializer=tf.zeros_initializer, trainable=True) prediction = tf.nn.xw_plus_b(outputs, weights, biases) elif network == 'affwildnet_vggface': from affwildnet import vggface_gru as net network = net.VGGFace(batch_size, sequence_length) network.setup(images_batch) prediction = network.get_output() variables_to_restore = tf.global_variables() with tf.Session() as sess: init_fn = slim.assign_from_checkpoint_fn( pretrained_model_checkpoint_path, variables_to_restore, ignore_missing_vars=False) init_fn(sess) pr = sess.run([prediction]) print( f"valence: {pr[0][len(pr[0])-1][0]:.3f} arousal: {pr[0][len(pr[0])-1][1]:.3f}" ) # plt.plot(np.reshape(pr, (-1, 2))) # plt.show() output.append( (f"{pr[0][len(pr[0])-1][0]}", f"{pr[0][len(pr[0])-1][1]}"))