Exemplo n.º 1
0
def _apply_graph_transform_tool_rewrites(g, input_node_names,
                                         output_node_names):
    # type: (gde.Graph, List[str], List[str]) -> tf.GraphDef
    """
  Use the [Graph Transform Tool](
  https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/
  graph_transforms/README.md)
  to perform a series of pre-deployment rewrites.

  Args:
     g: GDE representation of the core graph.
     input_node_names: Names of placeholder nodes that are used as inputs to
       the graph for inference. Placeholders NOT on this list will be
       considered dead code.
     output_node_names: Names of nodes that produce tensors that are outputs
       of the graph for inference purposes. Nodes not necessary to produce
       these tensors will be considered dead code.

  Returns: GraphDef representation of rewritten graph.
  """
    # Invoke the Graph Transform Tool using the undocumented Python APIs under
    # tensorflow.tools.graph_transforms
    after_tf_rewrites_graph_def = graph_transforms.TransformGraph(
        g.to_graph_def(),
        inputs=input_node_names,
        outputs=output_node_names,
        # Use the set of transforms recommended in the README under "Optimizing
        # for Deployment"
        transforms=[
            'strip_unused_nodes(type=float, shape="1,299,299,3")',
            'remove_nodes(op=Identity, op=CheckNumerics)',
            'fold_constants(ignore_errors=true)', 'fold_batch_norms',
            'fold_old_batch_norms'
        ])
    return after_tf_rewrites_graph_def
Exemplo n.º 2
0
def _gtt_transforms(graph_def, input_names, output_names, initializer_names,
                    transforms):
    """Pass through gtt transforms, applying them to the graph_def.

  Args:
    graph_def: A GraphDef proto to be transformed.
    input_names: Names of input nodes.
    output_names: Names of output nodes.
    initializer_names: Dictionary of the "infrastructural" nodes (initializers,
      save and restore ops, etc.) that should be retained even if they are not
      transitively reachable from output nodes. The keys in this dictionary
      indicate the collection where these nodes were obtained from.
    transforms: A list of strings naming the graph transforms to be applied in
      order.
  Returns:
    The transformed GraphDef.
  """
    if not transforms:
        transformed_graph_def = _graph_pb2.GraphDef()
        transformed_graph_def.CopyFrom(graph_def)
        return transformed_graph_def

    initializer_names_flat = sorted(
        [k for l in initializer_names.values() for k in l])
    all_output_names = output_names + initializer_names_flat
    return _graph_transforms.TransformGraph(graph_def, input_names,
                                            all_output_names, transforms)
def main(_):
    if len(sys.argv) < 2 or sys.argv[-1].startswith('-'):
        print(
            'Usage: model_graph_to_saved_model.py [--model_version=y] import_path export_dir'
        )
        sys.exit(-1)
    if FLAGS.import_path == '':
        print(
            'Please specify the path to the model graph you want to convert to SavedModel format.'
        )
        sys.exit(-1)
    if FLAGS.model_version <= 0:
        print('Please specify a positive value for version number.')
        sys.exit(-1)

    # Import model graph
    with tf.Session() as sess:
        graph_def = tf.GraphDef()
        with tf.gfile.GFile(FLAGS.import_path, 'rb') as input_file:
            input_graph_content = input_file.read()
            graph_def.ParseFromString(input_graph_content)

        # Apply transform optimizations
        output_graph = graph_transforms.TransformGraph(graph_def, [INPUTS],
                                                       [OUTPUTS],
                                                       [OPTIMIZATION])
        sess.graph.as_default()
        tf.import_graph_def(output_graph, name='')

        # Build the signature_def_map.
        in_image = sess.graph.get_tensor_by_name('input:0')
        inputs = {INPUTS: tf.saved_model.utils.build_tensor_info(in_image)}

        out_classes = sess.graph.get_tensor_by_name('predict:0')
        outputs = {
            OUTPUTS: tf.saved_model.utils.build_tensor_info(out_classes)
        }

        signature = tf.saved_model.signature_def_utils.build_signature_def(
            inputs=inputs,
            outputs=outputs,
            method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME)

        # Save out the SavedModel
        print('Exporting trained model to',
              FLAGS.export_dir + '/' + str(FLAGS.model_version))
        builder = tf.saved_model.builder.SavedModelBuilder(
            FLAGS.export_dir + '/' + str(FLAGS.model_version))
        builder.add_meta_graph_and_variables(
            sess, [tf.saved_model.tag_constants.SERVING],
            signature_def_map={
                tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
                signature
            })
        builder.save()

    print('Done!')
Exemplo n.º 4
0
def _do_transforms(graph_def,
                   input_names,
                   output_names,
                   initializer_names,
                   transforms,
                   saver_def=None,
                   checkpoint_path=None):
    """Apply requested transforms to a GraphDef, including freezing.

  This applies the Graph Transform Tool interleaved with graph freezing.

  Args:
    graph_def: A GraphDef proto to be transformed.
    input_names: Names of input nodes.
    output_names: Names of output nodes.
    initializer_names: Names of "infrastructural" nodes (initializers, save and
      restore ops, etc.) that should be retained even if they are not
      transitively reachable from output nodes.
    transforms: A list of strings naming the graph transforms to be applied in
      order.  These transform names are exactly those supported by the Graph
      Transform Tool, with the addition of the 'freeze_graph' transform.
    saver_def: A SaverDef proto used for restoring a checkpoint during freezing,
      if needed (default None).
    checkpoint_path:  A path to a checkpoint to restore during freezing,
      if needed (default None).
  Returns:
    The transformed GraphDef.
  """
    if not transforms:
        transformed_graph_def = _graph_pb2.GraphDef()
        transformed_graph_def.CopyFrom(graph_def)
        return transformed_graph_def
    else:
        try:
            freeze_index = transforms.index('freeze_graph')
        except ValueError:
            # No freeze_graph requested, so do all transforms in one go.
            all_output_names = output_names + initializer_names
            return _graph_transforms.TransformGraph(graph_def, input_names,
                                                    all_output_names,
                                                    transforms)

        # freeze_graph requested, possibly with transforms before and after.
        phase_1_transforms = transforms[:freeze_index]
        phase_2_transforms = transforms[freeze_index + 1:]

        graph_def = _do_transforms(graph_def, input_names, output_names,
                                   initializer_names, phase_1_transforms,
                                   saver_def, checkpoint_path)
        output_node_names = [_op_name(x) for x in output_names]
        graph_def = _freeze_graph_with_def_protos(graph_def, output_node_names,
                                                  saver_def, checkpoint_path)
        # No need for saver or checkpoint anymore
        return _do_transforms(graph_def, input_names, output_names, [],
                              phase_2_transforms)
def inference(image_path, input_graph):
    #read the image in jpeg format
    img = tf.read_file(image_path)
    input_op = tf.image.decode_jpeg(img, channels=3)
    sess = tf.Session()
    with sess.as_default():
        input = sess.run(input_op)
        if input.shape[0] != 224 or input.shape[1] != 224 or input.shape[
                2] != 3:
            #if the shape is not 224*224*3, preprocess the image, such as: resize
            input = preprocess(input)

        input = input.reshape(1, 224, 224, 3)

    #config the inference graph config
    infer_config = tf.ConfigProto()
    infer_config.intra_op_parallelism_threads = 26
    infer_config.inter_op_parallelism_threads = 1
    infer_config.use_per_session_threads = 1

    #read the pb model
    infer_graph = tf.Graph()
    with infer_graph.as_default():
        graph_def = tf.GraphDef()
        with tf.gfile.FastGFile(input_graph, 'rb') as input_file:
            input_graph_content = input_file.read()
            graph_def.ParseFromString(input_graph_content)

        output_graph = graph_transforms.TransformGraph(graph_def, [INPUTS],
                                                       [OUTPUTS],
                                                       [OPTIMIZATION])
        # for node in output_graph.node:
        #     print("name:{}   op:{}".format(node.name,node.op))

        tf.import_graph_def(output_graph, name='')

    # Definite input and output Tensors for detection_graph
    input_tensor = infer_graph.get_tensor_by_name('input:0')
    output_tensor = infer_graph.get_tensor_by_name('predict:0')
    infer_sess = tf.Session(graph=infer_graph, config=infer_config)

    predictions = infer_sess.run(output_tensor, {input_tensor: input})
    print(np.argmax(predictions))
    print("This image belong to : \"{}\"".format(
        word_label.label[np.argmax(predictions) - 1]))
    def run(self):
        """run benchmark with optimized graph"""

        print("Run inference")

        data_config = tf.ConfigProto()
        data_config.intra_op_parallelism_threads = self.args.data_num_intra_threads
        data_config.inter_op_parallelism_threads = self.args.data_num_inter_threads
        data_config.use_per_session_threads = 1

        infer_config = tf.ConfigProto()
        infer_config.intra_op_parallelism_threads = self.args.num_intra_threads
        infer_config.inter_op_parallelism_threads = self.args.num_inter_threads
        infer_config.use_per_session_threads = 1

        data_graph = tf.Graph()
        with data_graph.as_default():
            if (self.args.data_location):
                print("Inference with real data.")
                dataset = datasets.ImagenetData(self.args.data_location)
                preprocessor = dataset.get_image_preprocessor()(
                    RESNET_IMAGE_SIZE,
                    RESNET_IMAGE_SIZE,
                    self.args.batch_size,
                    intra_threads=self.args.num_intra_threads,
                    resize_method='crop')
                images, labels = preprocessor.minibatch(dataset,
                                                        subset='validation')
            else:
                print("Inference with dummy data.")
                input_shape = [
                    self.args.batch_size, RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE,
                    3
                ]
                images = tf.random.uniform(input_shape,
                                           0.0,
                                           255.0,
                                           dtype=tf.float32,
                                           name='synthetic_images')

        infer_graph = tf.Graph()
        with infer_graph.as_default():
            # convert the freezed graph to optimized graph
            graph_def = tf.GraphDef()
            with tf.gfile.FastGFile(self.args.input_graph, 'rb') as input_file:
                input_graph_content = input_file.read()
                graph_def.ParseFromString(input_graph_content)

            output_graph = graph_transforms.TransformGraph(
                graph_def, [INPUTS], [OUTPUTS], [OPTIMIZATION])
            tf.import_graph_def(output_graph, name='')

        # Definite input and output Tensors for detection_graph
        input_tensor = infer_graph.get_tensor_by_name('input:0')
        #output_tensor = infer_graph.get_tensor_by_name('resnet_v1_101/SpatialSqueeze:0')
        output_tensor = infer_graph.get_tensor_by_name(
            'resnet_v1_101/predictions/Reshape_1:0')

        #tf.global_variables_initializer()
        data_sess = tf.Session(graph=data_graph, config=data_config)
        infer_sess = tf.Session(graph=infer_graph, config=infer_config)

        num_processed_images = 0
        num_remaining_images = IMAGENET_VALIDATION_IMAGES

        if (not self.args.accuracy_only):  # performance check
            iteration = 0
            warm_up_iteration = self.args.warmup_steps
            total_run = self.args.steps
            total_time = 0
            #options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            #run_metadata = tf.RunMetadata()

            while num_remaining_images >= self.args.batch_size and iteration < total_run:
                iteration += 1

                # Reads and preprocess data
                data_load_start = time.time()
                image_np = data_sess.run(images)
                data_load_time = time.time() - data_load_start

                num_processed_images += self.args.batch_size
                num_remaining_images -= self.args.batch_size

                start_time = time.time()
                infer_sess.run([output_tensor],
                               feed_dict={input_tensor: image_np})
                time_consume = time.time() - start_time

                # only add data loading time for real data, not for dummy data
                if self.args.data_location:
                    time_consume += data_load_time

                #trace = timeline.Timeline(step_stats=run_metadata.step_stats)
                #with gfile.Open('resnet101_fp32_int8_master', 'w') as trace_file:
                #    trace_file.write(trace.generate_chrome_trace_format(show_memory=False))

                print('Iteration %d: %.3f sec' % (iteration, time_consume))
                if iteration > warm_up_iteration:
                    total_time += time_consume

            time_average = total_time / (iteration - warm_up_iteration)
            print('Average time: %.3f sec' % (time_average))

            print('Batch size = %d' % self.args.batch_size)
            if (self.args.batch_size == 1):
                print('Latency: %.3f ms' % (time_average * 1000))
            # print throughput for both batch size 1 and 128
            print('Throughput: %.3f images/sec' %
                  (self.args.batch_size / time_average))

        else:  # accuracy check
            total_accuracy1, total_accuracy5 = (0.0, 0.0)

            while num_remaining_images >= self.args.batch_size:
                # Reads and preprocess data
                np_images, np_labels = data_sess.run([images, labels])
                np_labels -= 1
                num_processed_images += self.args.batch_size
                num_remaining_images -= self.args.batch_size

                start_time = time.time()
                # Compute inference on the preprocessed data
                predictions = infer_sess.run(output_tensor,
                                             {input_tensor: np_images})
                elapsed_time = time.time() - start_time
                with tf.Graph().as_default() as accu_graph:
                    # Putting all code within this make things faster.
                    accuracy1 = tf.reduce_sum(
                        tf.cast(
                            tf.nn.in_top_k(tf.constant(predictions),
                                           tf.constant(np_labels), 1),
                            tf.float32))

                    accuracy5 = tf.reduce_sum(
                        tf.cast(
                            tf.nn.in_top_k(tf.constant(predictions),
                                           tf.constant(np_labels), 5),
                            tf.float32))
                    with tf.Session() as accu_sess:
                        np_accuracy1, np_accuracy5 = accu_sess.run(
                            [accuracy1, accuracy5])
                    total_accuracy1 += np_accuracy1
                    total_accuracy5 += np_accuracy5
                    print("Iteration time: %0.4f ms" % elapsed_time)
                    print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \
                        % (num_processed_images, total_accuracy1 / num_processed_images,
                           total_accuracy5 / num_processed_images))
Exemplo n.º 7
0
  def run(self):
    """run benchmark with optimized graph"""

    print("Run inference")

    data_config = tf.ConfigProto()
    data_config.intra_op_parallelism_threads = self.args.data_num_intra_threads
    data_config.inter_op_parallelism_threads = self.args.data_num_inter_threads
    data_config.use_per_session_threads = 1

    infer_config = tf.ConfigProto()
    infer_config.intra_op_parallelism_threads = self.args.num_intra_threads
    infer_config.inter_op_parallelism_threads = self.args.num_inter_threads
    infer_config.use_per_session_threads = 1

    data_graph = tf.Graph()
    with data_graph.as_default():
      if (self.args.data_location):
        print("Inference with real data.")
        if self.args.calibrate:
            subset = 'calibration'
        else:
            subset = 'validation'
        dataset = datasets.ImagenetData(self.args.data_location)
        preprocessor = dataset.get_image_preprocessor()(
            RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, self.args.batch_size,
            num_cores=self.args.num_cores,
            resize_method='crop')

        images, labels, filenames = preprocessor.minibatch(dataset, subset=subset)

        # If a results file path is provided, then start the prediction output file
        if self.args.results_file_path:
          with open(self.args.results_file_path, "w+") as fp:
            fp.write("filename,actual,prediction\n")
      else:
        print("Inference with dummy data.")
        input_shape = [self.args.batch_size, RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, 3]
        images = tf.random.uniform(input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images')

    infer_graph = tf.Graph()
    with infer_graph.as_default():
      graph_def = tf.GraphDef()
      with tf.gfile.FastGFile(self.args.input_graph, 'rb') as input_file:
        input_graph_content = input_file.read()
        graph_def.ParseFromString(input_graph_content)

      output_graph = graph_transforms.TransformGraph(graph_def,
                                         [INPUTS], [OUTPUTS], [OPTIMIZATION])
      tf.import_graph_def(output_graph, name='')

    # Definite input and output Tensors for detection_graph
    input_tensor = infer_graph.get_tensor_by_name('input:0')
    output_tensor = infer_graph.get_tensor_by_name('predict:0')

    data_sess = tf.Session(graph=data_graph,  config=data_config)
    infer_sess = tf.Session(graph=infer_graph, config=infer_config)

    num_processed_images = 0
    num_remaining_images = dataset.num_examples_per_epoch(subset=subset) - num_processed_images \
        if self.args.data_location else (self.args.batch_size * self.args.steps)

    if (not self.args.accuracy_only):
      iteration = 0
      warm_up_iteration = self.args.warmup_steps
      total_run = self.args.steps
      total_time = 0

      while num_remaining_images >= self.args.batch_size and iteration < total_run:
        iteration += 1
        tf_filenames = None
        np_labels = None
        data_load_start = time.time()
        if self.args.results_file_path:
          image_np, np_labels, tf_filenames = data_sess.run([images, labels, filenames])
        else:
          image_np = data_sess.run(images)

        data_load_time = time.time() - data_load_start

        num_processed_images += self.args.batch_size
        num_remaining_images -= self.args.batch_size

        start_time = time.time()
        predictions = infer_sess.run(output_tensor, feed_dict={input_tensor: image_np})
        time_consume = time.time() - start_time

        # Write out the file name, expected label, and top prediction
        self.write_results_output(predictions, tf_filenames, np_labels)

        # only add data loading time for real data, not for dummy data
        if self.args.data_location:
          time_consume += data_load_time

        print('Iteration %d: %.6f sec' % (iteration, time_consume))
        if iteration > warm_up_iteration:
          total_time += time_consume

      time_average = total_time / (iteration - warm_up_iteration)
      print('Average time: %.6f sec' % (time_average))

      print('Batch size = %d' % self.args.batch_size)
      if (self.args.batch_size == 1):
        print('Latency: %.3f ms' % (time_average * 1000))
      # print throughput for both batch size 1 and 128
      print('Throughput: %.3f images/sec' % (self.args.batch_size / time_average))

    else: # accuracy check
      total_accuracy1, total_accuracy5 = (0.0, 0.0)

      while num_remaining_images >= self.args.batch_size:
        # Reads and preprocess data
        tf_filenames = None
        if self.args.results_file_path:
          np_images, np_labels, tf_filenames = data_sess.run([images, labels, filenames])
        else:
          np_images, np_labels = data_sess.run([images, labels])
        num_processed_images += self.args.batch_size
        num_remaining_images -= self.args.batch_size

        start_time = time.time()
        # Compute inference on the preprocessed data
        predictions = infer_sess.run(output_tensor,
                               {input_tensor: np_images})
        elapsed_time = time.time() - start_time

        # Write out the file name, expected label, and top prediction
        self.write_results_output(predictions, tf_filenames, np_labels)

        with tf.Graph().as_default() as accu_graph:
          accuracy1 = tf.reduce_sum(
            tf.cast(tf.nn.in_top_k(tf.constant(predictions),
                                   tf.constant(np_labels), 1), tf.float32))

          accuracy5 = tf.reduce_sum(
            tf.cast(tf.nn.in_top_k(tf.constant(predictions),
                                   tf.constant(np_labels), 5), tf.float32))
          with tf.Session() as accu_sess:
            np_accuracy1, np_accuracy5 = accu_sess.run([accuracy1, accuracy5])

          total_accuracy1 += np_accuracy1
          total_accuracy5 += np_accuracy5

        print("Iteration time: %0.4f ms" % elapsed_time)
        print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \
                  % (num_processed_images, total_accuracy1 / num_processed_images,
                     total_accuracy5 / num_processed_images))
Exemplo n.º 8
0
    def tf_run_const_folding(self, file):
        print("run const folding----------------------------")
        tf.reset_default_graph()
        graph_def, graph = self.import_graph(file)

        print()
        if (self.debug):
            print('Placeholders:')
        assert graph is not None
        ops = graph.get_operations()  # type: Iterable[tf.Operation]
        input_nodes = []
        last_nodes = []
        for op in ops:
            if op.type == 'Placeholder':
                for tensor in op.outputs:
                    if (self.debug):
                        print('- {0:20s} {1}'.format("Tensor", tensor.name))
                    input_nodes.append(tensor.name)

        if (self.debug):
            print()
            print('Sinks (operations without outputs):')
        last_outputs = []
        num_nodes = len(ops)
        name2nodeIdx_map = {}
        for i in range(num_nodes):
            name2nodeIdx_map[ops[i].name] = i
        node_outputs_ = [[] for i in range(num_nodes)]
        for n in range(num_nodes):
            op = ops[n]
            pending_count = len(op.inputs)
            for i in range(pending_count):
                input_name_id = op.inputs[i].name.split(':')
                node_outputs_[name2nodeIdx_map[input_name_id[0]]].append(n)
        for n in range(num_nodes):
            if len(node_outputs_[n]) == 0 and ops[n].type != 'NoOp':
                if (self.debug):
                    print('- {0:20s} {1}'.format(ops[n].type, ops[n].name))
                for m in range(len(ops[n].inputs)):
                    if (self.debug):
                        print('<-in-- {0:20s}'.format(ops[n].inputs[m].name))
                    last_outputs.append(ops[n].inputs[m].name)
            '''
            if len(node_outputs_[n]) == 0 and ops[n].type == 'NoOp':
                for m in range(len(ops[n].control_inputs)):
                    print('<-in-^ {0:20s}'.format(ops[n].control_inputs[m].name))
                    last_outputs.append(ops[n].control_inputs[m].name)
            '''
        print(input_nodes)
        print(last_outputs)
        g_def_const = tf.import_graph_def(graph_def, name="")
        g_def_const = graph_transforms.TransformGraph(
            graph_def, input_nodes, last_outputs,
            ["fold_constants", "strip_unused_nodes"])

        print()
        self.folded_graph = file[:-3] + ".const_folded.pb"
        print("Saving Const-folded Graph... as " + self.folded_graph)
        graph_io.write_graph(as_text=False,
                             name=self.folded_graph,
                             logdir="./",
                             graph_or_graph_def=g_def_const)
        print("Finished.")
  def run(self):
    """run benchmark with optimized graph"""

    with tf.Graph().as_default() as graph:

      config = tf.ConfigProto()
      config.allow_soft_placement = True
      config.intra_op_parallelism_threads = self.args.num_intra_threads
      config.inter_op_parallelism_threads = self.args.num_inter_threads

      with tf.Session(config=config) as sess:

        # convert the freezed graph to optimized graph
        graph_def = tf.GraphDef()
        with tf.gfile.FastGFile(self.args.input_graph, 'rb') as input_file:
          input_graph_content = input_file.read()
          graph_def.ParseFromString(input_graph_content)

        output_graph = graph_transforms.TransformGraph(graph_def,
                                                       [INPUTS], [OUTPUTS], [OPTIMIZATION])
        sess.graph.as_default()
        tf.import_graph_def(output_graph, name='')

        # Definite input and output Tensors for detection_graph
        input_tensor = graph.get_tensor_by_name('input:0')
        output_tensor = graph.get_tensor_by_name('predict:0')
        tf.global_variables_initializer()

        num_processed_images = 0
        num_remaining_images = IMAGENET_VALIDATION_IMAGES

        if (self.args.data_location):
          print("Inference with real data.")
          dataset = datasets.ImagenetData(self.args.data_location)
          preprocessor = preprocessing.ImagePreprocessor(
            RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, self.args.batch_size,
            1,  # device count
            tf.float32,  # data_type for input fed to the graph
            train=False,  # doing inference
            resize_method='crop')
          images, labels, filenames = preprocessor.minibatch(dataset, subset='validation')
          num_remaining_images = dataset.num_examples_per_epoch(subset='validation') \
                                 - num_processed_images
        else:
          print("Inference with dummy data.")
          input_shape = [self.args.batch_size, RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, 3]
          images = tf.random.uniform(input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images')

        if (not self.args.accuracy_only):  # performance check
          iteration = 0
          warm_up_iteration = 10
          total_run = 40
          total_time = 0

          while num_remaining_images >= self.args.batch_size and iteration < total_run:
            iteration += 1

            # Reads and preprocess data
            if (self.args.data_location):
              preprocessed_images = sess.run([images[0]])
              image_np = preprocessed_images[0]
            else:
              image_np = sess.run(images)

            num_processed_images += self.args.batch_size
            num_remaining_images -= self.args.batch_size

            start_time = time.time()
            (predicts) = sess.run([output_tensor], feed_dict={input_tensor: image_np})
            time_consume = time.time() - start_time

            print('Iteration %d: %.3f sec' % (iteration, time_consume))
            if iteration > warm_up_iteration:
              total_time += time_consume

          time_average = total_time / (iteration - warm_up_iteration)
          print('Average time: %.3f sec' % (time_average))

          print('Batch size = %d' % self.args.batch_size)
          if (self.args.batch_size == 1):
            print('Latency: %.3f ms' % (time_average * 1000))
          # print throughput for both batch size 1 and 128
          print('Throughput: %.3f images/sec' % (self.args.batch_size / time_average))

        else:  # accuracy check
          total_accuracy1, total_accuracy5 = (0.0, 0.0)

          # If a results file path is provided, then start the prediction output file
          if self.args.results_file_path:
            with open(self.args.results_file_path, "w+") as fp:
              fp.write("filename,actual,prediction\n")

          while num_remaining_images >= self.args.batch_size:
            # Reads and preprocess data
            np_images, np_labels, tf_filenames = sess.run(
                [images[0], labels[0], filenames[0]])
            num_processed_images += self.args.batch_size
            num_remaining_images -= self.args.batch_size

            # Compute inference on the preprocessed data
            predictions = sess.run(output_tensor,
                                   {input_tensor: np_images})

            # Write out the file name, expected label, and top prediction
            if self.args.results_file_path:
              top_predictions = np.argmax(predictions, 1)
              with open(self.args.results_file_path, "a") as fp:
                for filename, expected_label, top_prediction in \
                        zip(tf_filenames, np_labels, top_predictions):
                  fp.write("{},{},{}\n".format(filename, expected_label, top_prediction))

            accuracy1 = tf.reduce_sum(
              tf.cast(tf.nn.in_top_k(tf.constant(predictions),
                                     tf.constant(np_labels), 1), tf.float32))

            accuracy5 = tf.reduce_sum(
              tf.cast(tf.nn.in_top_k(tf.constant(predictions),
                                     tf.constant(np_labels), 5), tf.float32))
            np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5])
            total_accuracy1 += np_accuracy1
            total_accuracy5 += np_accuracy5
            print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \
                  % (num_processed_images, total_accuracy1 / num_processed_images,
                     total_accuracy5 / num_processed_images))
Exemplo n.º 10
0
def main(_):
    # Remove any detritus of previous runs of this script, but leave the temp
    # dir in place because the user might have a shell there.
    if not os.path.isdir(_TMP_DIR):
        os.mkdir(_TMP_DIR)
    _clear_dir(_SAVED_MODEL_DIR)
    for f in _AFTER_MODEL_FILES:
        if os.path.isfile(f):
            os.remove(f)

    # Obtain a frozen graph for a MobileNet model
    if _USE_KERAS:
        frozen_graph_def, input_node, output_node = get_keras_frozen_graph()
    else:
        frozen_graph_def, input_node, output_node = get_slim_frozen_graph()

    _protobuf_to_file(frozen_graph_def, _FROZEN_GRAPH_FILE, "Frozen graph")

    # Now run through some of TensorFlow's built-in graph rewrites.
    # For that we use the undocumented Python APIs under
    # tensorflow.tools.graph_transforms
    after_tf_rewrites_graph_def = graph_transforms.TransformGraph(
        frozen_graph_def,
        inputs=[input_node],
        outputs=[output_node],
        # Use the set of transforms recommended in the README under "Optimizing
        # for Deployment"
        transforms=[
            'strip_unused_nodes(type=float, shape="1,299,299,3")',
            'remove_nodes(op=Identity, op=CheckNumerics)',
            'fold_constants(ignore_errors=true)', 'fold_batch_norms',
            'fold_old_batch_norms'
        ])

    _protobuf_to_file(after_tf_rewrites_graph_def, _TF_REWRITES_GRAPH_FILE,
                      "Graph after built-in TensorFlow rewrites")

    # Now run the GraphDef editor's fold_batch_norms_up() rewrite
    g = gde.Graph(after_tf_rewrites_graph_def)
    gde.rewrite.fold_batch_norms(g)
    gde.rewrite.fold_old_batch_norms(g)
    gde.rewrite.fold_batch_norms_up(g)
    after_gde_graph_def = g.to_graph_def(add_shapes=True)

    _protobuf_to_file(after_gde_graph_def, _GDE_REWRITES_GRAPH_FILE,
                      "Graph after fold_batch_norms_up() rewrite")

    # Dump some statistics about the number of each type of op
    print("            Number of ops in frozen graph: {}".format(
        len(frozen_graph_def.node)))
    print("    Number of ops after built-in rewrites: {}".format(
        len(after_tf_rewrites_graph_def.node)))
    print("Number of ops after GDE rewrites: {}".format(
        len(after_gde_graph_def.node)))

    # Run model before and after rewrite and compare results
    if not os.path.exists(_PANDA_PIC_FILE):
        print("Downloading {} to {}".format(_PANDA_PIC_URL, _PANDA_PIC_FILE))
        urllib.request.urlretrieve(_PANDA_PIC_URL, _PANDA_PIC_FILE)
    img = np.array(PIL.Image.open(_PANDA_PIC_FILE).resize(
        (224, 224))).astype(np.float)  # / 128 # - 1
    # Normalize each channel
    channel_means = np.mean(img, axis=(0, 1))

    print("Channel means are: {}".format(channel_means))
    print("Image shape is {}".format(img.shape))

    print("Frozen graph results:")
    run_graph(frozen_graph_def, img, input_node, output_node)
    print("Results after built-in rewrites:")
    run_graph(after_tf_rewrites_graph_def, img, input_node, output_node)
    print("Results after GDE rewrites:")
    run_graph(after_gde_graph_def, img, input_node, output_node)
Exemplo n.º 11
0
            input_nodes.append(tensor.name)
            
print()
print('Sinks (operations without outputs):')
last_outputs = []
num_nodes = len(ops)
name2nodeIdx_map = {}
for i in range(num_nodes):
    name2nodeIdx_map[ops[i].name] = i
node_outputs_ = [[] for i in range(num_nodes)]
for n in range(num_nodes):
#    if len(ops[n].outputs) > 0:
#        last_outputs.append(ops[n].outputs[0])
    op = ops[n]
    pending_count = len(op.inputs)
    for i in range(pending_count):
        input_name_id = op.inputs[i].name.split(':')
        node_outputs_[name2nodeIdx_map[input_name_id[0]]].append(n)
for n in range(num_nodes):
    if len(node_outputs_[n]) == 0 and ops[n].type != 'NoOp' and ops[n].type != 'Assert':
        print('- {0:20s} {1}'.format(ops[n].type, ops[n].name))
        last_outputs.append(ops[n].outputs[0].name)
    
g_def_const = tf.import_graph_def(graph_def, name="")
g_def_const = graph_transforms.TransformGraph(graph_def, input_nodes, last_outputs, ["fold_constants", "strip_unused_nodes", "merge_duplicate_nodes", "sort_by_execution_order"])

print()
folded_graph = args.file[:-3] + ".const_folded.pb"
print("Saving Const-folded Graph... as " + folded_graph)
graph_io.write_graph(as_text=False, name=folded_graph, logdir="./",graph_or_graph_def=g_def_const)
print("Finished.")
Exemplo n.º 12
0
def main(_):
    input_names = sorted(
        [name for name in FLAGS.input_names.replace(' ', '').split(',')])
    output_names = sorted(
        [name for name in FLAGS.output_names.replace(' ', '').split(',')])
    output_graph_def = None
    optimized_graph_path = None

    if not (FLAGS.frozen_graph_path or FLAGS.savedmodel_dir):
        raise AttributeError(
            'Either path to the frozen graph or directory of the SavedModel must be provided!'
        )
    if FLAGS.frozen_graph_path and not (FLAGS.input_names
                                        and FLAGS.output_names):
        raise AttributeError(
            'Input and output tensor names must be provided along with frozen graph path!'
        )

    if FLAGS.savedmodel_dir:
        savedmodel_pb_filename = 'saved_model.pb'
        path_to_pb = os.path.join(FLAGS.savedmodel_dir, savedmodel_pb_filename)

        signature_def = graph_def_util.saved_model_pb2.SavedModel()
        graph_def_util.read_def(path_to_pb, signature_def, 'The SavedModel')
        signature_def = signature_def.meta_graphs[0].signature_def[
            tf.saved_model.signature_constants.
            DEFAULT_SERVING_SIGNATURE_DEF_KEY]

        input_names = sorted([
            item.name[:item.name.find(':')]
            for _, item in signature_def.inputs.items()
        ])
        output_names = sorted([
            item.name[:item.name.find(':')]
            for _, item in signature_def.outputs.items()
        ])
        frozen_graph_filename = 'frozen_graph.pb'
        frozen_graph_path = os.path.join(FLAGS.savedmodel_dir,
                                         frozen_graph_filename)

        output_graph_def = freeze_graph.freeze_graph(
            input_graph=None,
            input_saver=None,
            input_checkpoint=None,
            input_binary=True,
            clear_devices=True,
            output_node_names=', '.join(output_names),
            restore_op_name=None,
            filename_tensor_name=None,
            output_graph=frozen_graph_path,
            initializer_nodes=None,
            input_saved_model_dir=FLAGS.savedmodel_dir)
        optimized_graph_path = FLAGS.savedmodel_dir
    elif FLAGS.frozen_graph_path:
        output_graph_def = graph_def_util.graph_pb2.GraphDef()
        graph_def_util.read_def(FLAGS.frozen_graph_path, output_graph_def,
                                'The frozen graph')
        optimized_graph_path = os.path.dirname(FLAGS.frozen_graph_path)

    # If you want to apply only 'optimize_for_inference' uncomment the following, but
    # don't forget to remove 'graph_transforms' optimization since they are not compatible.
    #
    # output_graph_def = optimize_for_inference_lib.optimize_for_inference(
    #     input_graph_def=output_graph_def, placeholder_type_enum=tf.float32.as_datatype_enum,
    #     input_node_names=input_names, output_node_names=output_names)

    transforms = [
        'strip_unused_nodes(type=float, shape="1,299,299,3")',
        'remove_nodes(op=Identity, op=CheckNumerics)',
        'fold_constants(ignore_errors=true)', 'fold_batch_norms',
        'fold_old_batch_norms', 'quantize_weights', 'quantize_nodes'
    ]

    output_graph_def = graph_transforms.TransformGraph(
        input_graph_def=output_graph_def,
        transforms=transforms,
        inputs=input_names,
        outputs=output_names)

    optimized_graph_filename = 'optimized_graph.pb'
    optimized_graph_path = os.path.join(optimized_graph_path,
                                        optimized_graph_filename)
    graph_def_util.write_def(optimized_graph_path, output_graph_def)
Exemplo n.º 13
0
def run_inference(tfConfigParams, images, image_path):
    model_dir = os.path.join(os.environ['APP_HOME'], "Modules",
                             "Deep-Learning", "packages", "models")
    if FLAGS.precision == 'int8':
        INPUTS = 'input'
        OUTPUTS = 'predict'
    if FLAGS.precision == 'fp32':
        INPUTS = 'input'
        OUTPUTS = 'resnet_v1_50/SpatialSqueeze'
    device = "/" + FLAGS.aarch + ":0" if len(FLAGS.aarch) > 1 else "/cpu:0"
    # open the device to run on
    with tf.device(device):
        timing_csv_file = open(FLAGS.csv_file_path, "a")
        # prepare the config
        with tf.Graph().as_default() as graph:
            config = tf.ConfigProto()
            for key, value in tfConfigParams.items():
                if (key == "inter_op_parallelism_threads"):
                    config.inter_op_parallelism_threads = value
                if (key == "intra_op_parallelism_threads"):
                    config.intra_op_parallelism_threads = value
                if (key == "allow_soft_placement"):
                    config.allow_soft_placement = value
                    # open a tensorflow session and load the graph
            timing_csv_buffer_data = []
            with tf.Session(config=config) as sess:
                graph_def = tf.GraphDef()
                with tf.gfile.GFile(model_dir + "/" + FLAGS.frozen_graph,
                                    'rb') as input_file:
                    input_graph_content = input_file.read()
                    graph_def.ParseFromString(input_graph_content)
                output_graph = graph_transforms.TransformGraph(
                    graph_def, [INPUTS], [OUTPUTS], [OPTIMIZATION])
                sess.graph.as_default()
                tf.import_graph_def(output_graph, name='')

                # Definite input and output Tensors for detection_graph
                input_tensor = graph.get_tensor_by_name(INPUTS + ':0')
                output_tensor = graph.get_tensor_by_name(OUTPUTS + ':0')
                tf.global_variables_initializer()

                #start inference
                tf.logging.info("Starting Warmup cycle")
                for _ in range(_WARMUP_NUM_LOOPS):
                    predicts = sess.run([output_tensor],
                                        feed_dict={input_tensor: images})
                for iter in range(FLAGS.iterations):
                    tf.logging.info("Starting timing.")
                    tstart = time.time()
                    predicts = sess.run([output_tensor],
                                        feed_dict={input_tensor: images})
                    tend = time.time()
                    predictions = np.squeeze(predicts)
                    if (os.environ["DEMO"] == "True"):
                        imageName = os.path.basename(image_path)
                        predictionsList = predictions.argsort()[-5:][::-1]
                        scoreList = []
                        for node_id in predictionsList:
                            scoreList.append(predictions[node_id])
                        timing_csv_buffer_data.append(
                            str(tstart) + ',' + str(tend) + ',' + imageName +
                            ',' + str(predictionsList[0]) + ',' +
                            str(scoreList[0]) + ',' + str(predictionsList[1]) +
                            ',' + str(scoreList[1]) + ',' +
                            str(predictionsList[2]) + ',' + str(scoreList[2]) +
                            ',' + str(predictionsList[3]) + ',' +
                            str(scoreList[3]) + ',' + str(predictionsList[4]) +
                            ',' + str(scoreList[4]))
                    else:
                        timing_csv_buffer_data.append(
                            str(tstart) + ',' + str(tend))
                    predictions = np.squeeze(predicts)

    return timing_csv_buffer_data
Exemplo n.º 14
0
def _do_transforms(graph_def, input_names, output_names, initializer_names,
                   transforms, saver_def=None, checkpoint_path=None):
  """Apply requested transforms to a GraphDef, including freezing.

  This applies the Graph Transform Tool interleaved with graph freezing.

  Args:
    graph_def: A GraphDef proto to be transformed.
    input_names: Names of input nodes.
    output_names: Names of output nodes.
    initializer_names: Dictionary of the "infrastructural" nodes (initializers,
      save and restore ops, etc.) that should be retained even if they are not
      transitively reachable from output nodes. The keys in this dictionary
      indicate the collection where these nodes were obtained from.
    transforms: A list of strings naming the graph transforms to be applied in
      order.  These transform names are exactly those supported by the Graph
      Transform Tool, with the addition of the 'freeze_graph' transform.
    saver_def: A SaverDef proto used for restoring a checkpoint during freezing,
      if needed (default None).
    checkpoint_path:  A path to a checkpoint to restore during freezing,
      if needed (default None).
  Returns:
    The transformed GraphDef.
  """
  if not transforms:
    transformed_graph_def = _graph_pb2.GraphDef()
    transformed_graph_def.CopyFrom(graph_def)
    return transformed_graph_def
  else:
    try:
      freeze_index = transforms.index(_FREEZE_GRAPH_TRANSFORM_NAME)
    except ValueError:
      # No freeze_graph requested, so do all transforms in one go.
      initializer_names_flat = sorted(
          [k for l in initializer_names.values() for k in l])
      all_output_names = output_names + initializer_names_flat
      return _graph_transforms.TransformGraph(
          graph_def, input_names, all_output_names, transforms)

    # freeze_graph requested, possibly with transforms before and after.
    phase_1_transforms = transforms[:freeze_index]
    phase_2_transforms = transforms[freeze_index+1:]

    graph_def = _do_transforms(
        graph_def, input_names, output_names, initializer_names,
        phase_1_transforms, saver_def, checkpoint_path)
    output_node_names = [_op_name(x) for x in output_names]
    graph_def = _freeze_graph_with_def_protos(
        graph_def, output_node_names,
        initializer_names[_ops.GraphKeys.TABLE_INITIALIZERS],
        initializer_names[_saved_model_constants.LEGACY_INIT_OP_KEY][0],
        saver_def, checkpoint_path)
    # No need for saver or checkpoint anymore
    pruned_initializer_names = {}
    # Freeze graph will prune all initializers and shared init nodes if table
    # initializers are not present. Handle this case in future GTT transforms.
    if initializer_names[_ops.GraphKeys.TABLE_INITIALIZERS]:
      pruned_initializer_names[_ops.GraphKeys.TABLE_INITIALIZERS] = (
          initializer_names[_ops.GraphKeys.TABLE_INITIALIZERS])
      pruned_initializer_names[_saved_model_constants.LEGACY_INIT_OP_KEY] = (
          initializer_names[_saved_model_constants.LEGACY_INIT_OP_KEY])

    return _do_transforms(graph_def, input_names, output_names,
                          pruned_initializer_names, phase_2_transforms)
    def run(self):
        """run benchmark with optimized graph"""

        print("Run inference")

        data_config = tf.ConfigProto()
        data_config.intra_op_parallelism_threads = self.args.data_num_intra_threads
        data_config.inter_op_parallelism_threads = self.args.data_num_inter_threads
        data_config.use_per_session_threads = 1

        infer_config = tf.ConfigProto()
        infer_config.intra_op_parallelism_threads = self.args.num_intra_threads
        infer_config.inter_op_parallelism_threads = self.args.num_inter_threads
        infer_config.use_per_session_threads = 1

        data_graph = tf.Graph()
        with data_graph.as_default():
            if (self.args.data_location):
                print("Inference with real data.")
                dataset = datasets.ImagenetData(self.args.data_location)
                preprocessor = dataset.get_image_preprocessor()(
                    INCEPTION_V3_IMAGE_SIZE,
                    INCEPTION_V3_IMAGE_SIZE,
                    self.args.batch_size,
                    num_cores=self.args.num_cores,
                    resize_method='bilinear')
                images, labels = preprocessor.minibatch(dataset,
                                                        subset='validation')
            else:
                print("Inference with dummy data.")
                input_shape = [
                    self.args.batch_size, INCEPTION_V3_IMAGE_SIZE,
                    INCEPTION_V3_IMAGE_SIZE, 3
                ]
                images = tf.random.uniform(input_shape,
                                           0.0,
                                           255.0,
                                           dtype=tf.float32,
                                           name='synthetic_images')

        infer_graph = tf.Graph()
        with infer_graph.as_default():
            graph_def = tf.GraphDef()
            with tf.gfile.FastGFile(self.args.input_graph, 'rb') as input_file:
                input_graph_content = input_file.read()
                graph_def.ParseFromString(input_graph_content)

            output_graph = graph_transforms.TransformGraph(
                graph_def, [INPUTS], [OUTPUTS], [OPTIMIZATION])
            tf.import_graph_def(output_graph, name='')

        # Definite input and output Tensors for detection_graph
        input_tensor = infer_graph.get_tensor_by_name('input:0')
        output_tensor = infer_graph.get_tensor_by_name('predict:0')

        data_sess = tf.Session(graph=data_graph, config=data_config)
        infer_sess = tf.Session(graph=infer_graph, config=infer_config)

        num_processed_images = 0
        num_remaining_images = datasets.IMAGENET_NUM_VAL_IMAGES

        if (not self.args.accuracy_only):
            iteration = 0
            warm_up_iteration = self.args.warmup_steps
            total_run = self.args.steps
            total_time = 0

            while num_remaining_images >= self.args.batch_size and iteration < total_run:
                iteration += 1

                data_load_start = time.time()
                image_np = data_sess.run(images)
                data_load_time = time.time() - data_load_start

                num_processed_images += self.args.batch_size
                num_remaining_images -= self.args.batch_size

                start_time = time.time()
                infer_sess.run([output_tensor],
                               feed_dict={input_tensor: image_np})
                time_consume = time.time() - start_time

                # only add data loading time for real data, not for dummy data
                if self.args.data_location:
                    time_consume += data_load_time

                print('Iteration %d: %.6f sec' % (iteration, time_consume))
                if iteration > warm_up_iteration:
                    total_time += time_consume

            time_average = total_time / (iteration - warm_up_iteration)
            print('Average time: %.6f sec' % (time_average))

            print('Batch size = %d' % self.args.batch_size)
            if (self.args.batch_size == 1):
                print('Latency: %.3f ms' % (time_average * 1000))

            print('Throughput: %.3f images/sec' %
                  (self.args.batch_size / time_average))

        else:  # accuracy check
            total_accuracy1, total_accuracy5 = (0.0, 0.0)

            while num_remaining_images >= self.args.batch_size:
                # Reads and preprocess data
                np_images, np_labels = data_sess.run([images, labels])
                num_processed_images += self.args.batch_size
                num_remaining_images -= self.args.batch_size

                # Compute inference on the preprocessed data
                predictions = infer_sess.run(output_tensor,
                                             {input_tensor: np_images})

                with tf.Graph().as_default() as accu_graph:
                    accuracy1 = tf.reduce_sum(
                        tf.cast(
                            tf.nn.in_top_k(tf.constant(predictions),
                                           tf.constant(np_labels), 1),
                            tf.float32))

                    accuracy5 = tf.reduce_sum(
                        tf.cast(
                            tf.nn.in_top_k(tf.constant(predictions),
                                           tf.constant(np_labels), 5),
                            tf.float32))
                    with tf.Session() as accu_sess:
                        np_accuracy1, np_accuracy5 = accu_sess.run(
                            [accuracy1, accuracy5])

                    total_accuracy1 += np_accuracy1
                    total_accuracy5 += np_accuracy5

                print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \
                      % (num_processed_images, total_accuracy1 / num_processed_images,
                         total_accuracy5 / num_processed_images))