def add_image_preprocessing(dataset, input_nchan, image_size, batch_size, num_compute_devices, input_data_type, resize_method, train): """Add image Preprocessing ops to tf graph.""" if dataset is not None: preproc_train = preprocessing.ImagePreprocessor( image_size, image_size, batch_size, num_compute_devices, input_data_type, train=train, resize_method=resize_method) if train: subset = 'train' else: subset = 'validation' images, labels = preproc_train.minibatch(dataset, subset=subset) images_splits = images labels_splits = labels # Note: We force all datasets to 1000 to ensure even comparison # This works because we use sparse_softmax_cross_entropy nclass = 1001 else: nclass = 1001 input_shape = [batch_size, image_size, image_size, input_nchan] images = tf.truncated_normal(input_shape, dtype=input_data_type, stddev=1e-1, name='synthetic_images') labels = tf.random_uniform([batch_size], minval=1, maxval=nclass, dtype=tf.int32, name='synthetic_labels') # Note: This results in a H2D copy, but no computation # Note: This avoids recomputation of the random values, but still # results in a H2D copy. images = tf.contrib.framework.local_variable(images, name='images') labels = tf.contrib.framework.local_variable(labels, name='labels') # Change to 0-based (don't use background class like Inception does) labels -= 1 if num_compute_devices == 1: images_splits = [images] labels_splits = [labels] else: images_splits = tf.split(images, num_compute_devices, 0) labels_splits = tf.split(labels, num_compute_devices, 0) return nclass, images_splits, labels_splits
input_height = 224 if args.input_width: input_width = args.input_width else: input_width = 224 batch_size = args.batch_size input_layer = args.input_layer output_layer = args.output_layer num_inter_threads = args.num_inter_threads num_intra_threads = args.num_intra_threads data_location = args.data_location dataset = datasets.ImagenetData(data_location) preprocessor = preprocessing.ImagePreprocessor( input_height, input_width, batch_size, 1, # device count tf.float32, # data_type for input fed to the graph train=False, # doing inference resize_method='crop') images, labels = preprocessor.minibatch(dataset, subset='validation') graph = load_graph(model_file) input_tensor = graph.get_tensor_by_name(input_layer + ":0") output_tensor = graph.get_tensor_by_name(output_layer + ":0") config = tf.ConfigProto() config.inter_op_parallelism_threads = num_inter_threads config.intra_op_parallelism_threads = num_intra_threads total_accuracy1, total_accuracy5 = (0.0, 0.0) num_processed_images = 0 num_remaining_images = dataset.num_examples_per_epoch(subset='validation') \
def run(self): """run benchmark with optimized graph""" with tf.Graph().as_default() as graph: config = tf.ConfigProto() config.allow_soft_placement = True config.intra_op_parallelism_threads = self.args.num_intra_threads config.inter_op_parallelism_threads = self.args.num_inter_threads with tf.Session(config=config) as sess: # convert the freezed graph to optimized graph graph_def = tf.GraphDef() with tf.gfile.FastGFile(self.args.input_graph, 'rb') as input_file: input_graph_content = input_file.read() graph_def.ParseFromString(input_graph_content) output_graph = graph_transforms.TransformGraph(graph_def, [INPUTS], [OUTPUTS], [OPTIMIZATION]) sess.graph.as_default() tf.import_graph_def(output_graph, name='') # Definite input and output Tensors for detection_graph input_tensor = graph.get_tensor_by_name('input:0') output_tensor = graph.get_tensor_by_name('predict:0') tf.global_variables_initializer() num_processed_images = 0 num_remaining_images = IMAGENET_VALIDATION_IMAGES if (self.args.data_location): print("Inference with real data.") dataset = datasets.ImagenetData(self.args.data_location) preprocessor = preprocessing.ImagePreprocessor( RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, self.args.batch_size, 1, # device count tf.float32, # data_type for input fed to the graph train=False, # doing inference resize_method='crop') images, labels, filenames = preprocessor.minibatch(dataset, subset='validation') num_remaining_images = dataset.num_examples_per_epoch(subset='validation') \ - num_processed_images else: print("Inference with dummy data.") input_shape = [self.args.batch_size, RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, 3] images = tf.random.uniform(input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images') if (not self.args.accuracy_only): # performance check iteration = 0 warm_up_iteration = 10 total_run = 40 total_time = 0 while num_remaining_images >= self.args.batch_size and iteration < total_run: iteration += 1 # Reads and preprocess data if (self.args.data_location): preprocessed_images = sess.run([images[0]]) image_np = preprocessed_images[0] else: image_np = sess.run(images) num_processed_images += self.args.batch_size num_remaining_images -= self.args.batch_size start_time = time.time() (predicts) = sess.run([output_tensor], feed_dict={input_tensor: image_np}) time_consume = time.time() - start_time print('Iteration %d: %.3f sec' % (iteration, time_consume)) if iteration > warm_up_iteration: total_time += time_consume time_average = total_time / (iteration - warm_up_iteration) print('Average time: %.3f sec' % (time_average)) print('Batch size = %d' % self.args.batch_size) if (self.args.batch_size == 1): print('Latency: %.3f ms' % (time_average * 1000)) # print throughput for both batch size 1 and 128 print('Throughput: %.3f images/sec' % (self.args.batch_size / time_average)) else: # accuracy check total_accuracy1, total_accuracy5 = (0.0, 0.0) # If a results file path is provided, then start the prediction output file if self.args.results_file_path: with open(self.args.results_file_path, "w+") as fp: fp.write("filename,actual,prediction\n") while num_remaining_images >= self.args.batch_size: # Reads and preprocess data np_images, np_labels, tf_filenames = sess.run( [images[0], labels[0], filenames[0]]) num_processed_images += self.args.batch_size num_remaining_images -= self.args.batch_size # Compute inference on the preprocessed data predictions = sess.run(output_tensor, {input_tensor: np_images}) # Write out the file name, expected label, and top prediction if self.args.results_file_path: top_predictions = np.argmax(predictions, 1) with open(self.args.results_file_path, "a") as fp: for filename, expected_label, top_prediction in \ zip(tf_filenames, np_labels, top_predictions): fp.write("{},{},{}\n".format(filename, expected_label, top_prediction)) accuracy1 = tf.reduce_sum( tf.cast(tf.nn.in_top_k(tf.constant(predictions), tf.constant(np_labels), 1), tf.float32)) accuracy5 = tf.reduce_sum( tf.cast(tf.nn.in_top_k(tf.constant(predictions), tf.constant(np_labels), 5), tf.float32)) np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) total_accuracy1 += np_accuracy1 total_accuracy5 += np_accuracy5 print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ % (num_processed_images, total_accuracy1 / num_processed_images, total_accuracy5 / num_processed_images))