示例#1
0
 def testAccumulateStatistics(self):
     weight_params_total = ops.OpStats("weight_parameters")
     self.assertEqual(None, weight_params_total.value)
     flops_total = ops.OpStats("flops")
     self.assertEqual(None, flops_total.value)
     first_weight_params = ops.OpStats("weight_parameters", 100)
     weight_params_total += first_weight_params
     self.assertEqual(100, weight_params_total.value)
     second_flops = ops.OpStats("flops", 3)
     flops_total += second_flops
     self.assertEqual(3, flops_total.value)
     second_weight_params = ops.OpStats("weight_parameters", 200)
     weight_params_total += second_weight_params
     self.assertEqual(300, weight_params_total.value)
示例#2
0
def _conv_2d_backprop_input_flops(graph, node):
  """Compute flops for Conv2DBackpropInput operation."""
  # Formula:
  #  batch_size * image_x_dim * image_y_dim * kernel_x_dim * kernel_y_dim
  #  * input_depth * output_depth * 2 / (image_x_stride * image_x_stride)
  #
  # Where:
  # image_x_dim, image_y_dim and input_depth --- size of input to source (no
  #   backprop) convolution, in other words they are sizes of backprop output.
  # output_depth --- number of filters in the original convolution, thus
  #   depth of backprop input.
  # kernel_x_dim and kernel_y_dim --- sizes of filter in spatial dimension
  # image_x_stride and image_x_stride --- strides of the convolution
  #
  _verify_conv_data_format(node)
  # out_shape = [batch_size, image_y_dim, image_x_dim, input_depth]
  out_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name)
  out_shape.assert_is_fully_defined()
  # kernel_shape = [kernel_y_dim, kernel_x_dim, input_depth, output_depth]
  kernel_shape = graph_util.tensor_shape_from_node_def_name(graph,
                                                            node.input[1])
  kernel_shape.assert_is_fully_defined()
  # strides
  strides_shape = list(node.attr["strides"].list.i)
  strides_product = strides_shape[1] * strides_shape[2]
  return ops.OpStats("flops",
                     (2 * out_shape.num_elements()
                      * kernel_shape.num_elements()
                      / (out_shape.dims[-1].value * strides_product)))
示例#3
0
def _calc_bias_add_flops(graph, node):
    """Calculates the computing needed for BiasAdd."""
    input_shape = graph_util.tensor_shape_from_node_def_name(
        graph, node.input[0])
    input_shape.assert_is_fully_defined()
    input_count = np.prod(input_shape.as_list())
    return ops.OpStats("flops", input_count)
示例#4
0
def _l2_loss_flops(graph, node):
  """Compute flops for L2Loss operation."""
  in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0])
  in_shape.assert_is_fully_defined()
  # Tensorflow uses inefficient implementation, with (3*N-1) flops:
  # Optimal implementation is 2*N flops
  return ops.OpStats("flops", in_shape.num_elements() * 3 - 1)
示例#5
0
def _calc_bias_add_weight_params(graph, node):
    """Calculates the on-disk weight parameters for BiasAdd."""
    bias_shape = graph_util.tensor_shape_from_node_def_name(
        graph, node.input[1])
    bias_shape.assert_is_fully_defined()
    bias_count = np.prod(bias_shape.as_list())
    return ops.OpStats("weight_parameters", bias_count)
示例#6
0
def calculate_graph_metrics(graph_def, statistic_types, input_layer,
                            input_shape_override, batch_size):
    """Looks at the performance statistics of all nodes in the graph."""
    _ = tf.import_graph_def(graph_def, name="")
    total_stats = {}
    node_stats = {}
    for statistic_type in statistic_types:
        total_stats[statistic_type] = ops.OpStats(statistic_type)
        node_stats[statistic_type] = {}
    # Make sure we get pretty-printed numbers with separators.
    locale.setlocale(locale.LC_ALL, "")
    with tf.Session() as sess:
        input_tensor = sess.graph.get_tensor_by_name(input_layer)
        input_shape_tensor = input_tensor.get_shape()
        if input_shape_tensor:
            input_shape = input_shape_tensor.as_list()
        else:
            input_shape = None
        if input_shape_override:
            input_shape = input_shape_override
        input_shape[0] = batch_size
        input_tensor.set_shape(input_shape)
        for node in graph_def.node:
            # Ensure that the updated input shape has been fully-propagated before we
            # ask for the statistics, since they may depend on the output size.
            op = sess.graph.get_operation_by_name(node.name)
            ops.set_shapes_for_outputs(op)
            for statistic_type in statistic_types:
                current_stats = ops.get_stats_for_node_def(
                    sess.graph, node, statistic_type)
                node_stats[statistic_type][node.name] = current_stats
                total_stats[statistic_type] += current_stats
    return total_stats, node_stats
示例#7
0
def _max_pool_grad_flops(graph, node):
  """Compute flops for MaxPoolGrad operation."""
  _verify_conv_data_format(node)
  #
  # MaxPoolGrad declaration:
  #   Inputs:
  #     - orig_input  -- original input tensor (of max_pool)
  #     - orig_output  -- original output tensor (of max_pool)
  #     - grad --  gradient with respect to output of max_pool
  #   Outputs:
  #     - output -- gradient with respect to input of max_pool
  #   Attributes:
  #     - ksize
  #     - strides
  #     - padding
  #     - data_format
  # It computes MaxPool first, then one flop per each element of original output
  #
  kernel_shape = list(node.attr["ksize"].list.i)
  kernel_area = _list_product(kernel_shape)
  orig_out_shape = graph_util.tensor_shape_from_node_def_name(graph,
                                                              node.input[1])
  orig_out_shape.assert_is_fully_defined()
  max_pool_ops = kernel_area * orig_out_shape.num_elements()
  return ops.OpStats("flops", max_pool_ops + orig_out_shape.num_elements())
示例#8
0
def _add_n_flops(graph, node):
  """Compute flops for AddN operation."""
  if not node.input:
    return _zero_flops(graph, node)
  in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0])
  in_shape.assert_is_fully_defined()
  return ops.OpStats("flops", in_shape.num_elements() * (len(node.input) - 1))
示例#9
0
def _reduction_op_flops(graph, node, reduce_flops=1, finalize_flops=0):
  """Common code which compute flops for reduction operations."""
  in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0])
  in_shape.assert_is_fully_defined()
  out_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name)
  out_shape.assert_is_fully_defined()
  num_flops = (in_shape.num_elements() * reduce_flops
               + out_shape.num_elements() * (finalize_flops - reduce_flops))
  return ops.OpStats("flops", num_flops)
示例#10
0
def _calc_mat_mul_weight_parameters(graph, node):
  """Calculates the on-disk size of the weights for MatMul."""
  # We assume here that the weights are always in the second input to the op,
  # which is generally true by convention for fully-connected layers, but not
  # enforced or checked.
  weights_shape = graph_util.tensor_shape_from_node_def_name(graph,
                                                             node.input[1])
  weights_shape.assert_is_fully_defined()
  return ops.OpStats("weight_parameters",
                     (int(weights_shape[1]) * int(weights_shape[0])))
示例#11
0
def _add_n_flops(graph, node):
    """Compute flops for AddN operation."""
    if not node.input:
        return flops_registry._zero_flops(graph, node)
    in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0])
    in_shape.assert_is_fully_defined()
    if node.attr['T'].type == tf.complex64:
        flops_per_element = 2
    else:
        flops_per_element = 1
    return ops.OpStats("flops", in_shape.num_elements() * flops_per_element * (len(node.input) - 1))
示例#12
0
def _calc_mat_mul_flops(graph, node):
  """Calculates the compute resources needed for MatMul."""
  transpose_a = node.attr["transpose_a"].b
  a_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0])
  a_shape.assert_is_fully_defined()
  if transpose_a:
    k = int(a_shape[0])
  else:
    k = int(a_shape[1])
  output_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name)
  output_shape.assert_is_fully_defined()
  output_count = np.prod(output_shape.as_list())
  return ops.OpStats("flops", (k * output_count * 2))
示例#13
0
def _ifft_2d_flops(graph, node):
    """Compute flops for ifft2d operation.
    
    Using same value as in fft2d"""
    if not node.input:
        return flops_registry._zero_flops(graph, node)
    in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0])
    in_shape.assert_is_fully_defined()
    n = in_shape.num_elements()
    if n == 0:
        return flops_registry._zero_flops(graph, node)
    num_ops = np.int_(np.ceil(5 * n * np.log2(n)))
    return ops.OpStats("flops", num_ops)
示例#14
0
def _avg_pool_grad_flops(graph, node):
  """Compute flops for AvgPoolGrad operation."""
  _verify_conv_data_format(node)
  # Pooling gradient implementation:
  out_backprop_shape = graph_util.tensor_shape_from_node_def_name(graph,
                                                                  node.input[1])
  out_backprop_shape.assert_is_fully_defined()
  kernel_shape = list(node.attr["ksize"].list.i)
  kernel_area = _list_product(kernel_shape)
  # TensorFlow multiply each element of pooling window by coefficient,
  # then sum up all of them, thus we have 2 flops per element:
  # More optimal implementation - if division is done after.
  return ops.OpStats("flops",
                     kernel_area * out_backprop_shape.num_elements() * 2)
示例#15
0
def _calc_depthwise_conv_flops(graph, node):
    """Calculates the compute resources needed for DepthwiseConv2dNative."""
    input_shape = graph_util.tensor_shape_from_node_def_name(
        graph, node.input[0])
    input_shape.assert_is_fully_defined()
    filter_shape = graph_util.tensor_shape_from_node_def_name(
        graph, node.input[1])
    filter_shape.assert_is_fully_defined()
    output_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name)
    output_shape.assert_is_fully_defined()
    filter_height = int(filter_shape[0])
    filter_width = int(filter_shape[1])
    output_count = np.prod(output_shape.as_list())
    return ops.OpStats("flops",
                       (output_count * filter_height * filter_width * 2))
示例#16
0
def _calc_conv_weight_params(graph, node):
  """Calculates the on-disk size of the weights for Conv2D."""
  input_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0])
  input_shape.assert_is_fully_defined()
  filter_shape = graph_util.tensor_shape_from_node_def_name(graph,
                                                            node.input[1])
  filter_shape.assert_is_fully_defined()
  output_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name)
  output_shape.assert_is_fully_defined()
  filter_height = int(filter_shape[0])
  filter_width = int(filter_shape[1])
  filter_in_depth = int(filter_shape[2])
  filter_out_depth = int(filter_shape[3])
  return ops.OpStats("weight_parameters", (filter_height * filter_width *
                                           filter_in_depth * filter_out_depth))
示例#17
0
def _flops_fused_batch_norm_v3(graph, node):
    """inference is only supportted"""
    in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0])
    in_shape.assert_is_fully_defined()
    mean_shape = graph_util.tensor_shape_from_node_def_name(
        graph, node.input[3])
    mean_shape.assert_is_fully_defined()
    variance_shape = graph_util.tensor_shape_from_node_def_name(
        graph, node.input[4])
    variance_shape.assert_is_fully_defined()

    if node.attr["is_training"].b is True:
        raise ValueError("Only supports inference mode")

    num_flops = (2 * in_shape.num_elements() +
                 5 * variance_shape.num_elements() + mean_shape.num_elements())
    return ops.OpStats("flops", num_flops)
示例#18
0
def _fft_2d_flops(graph, node):
    """Compute flops for fft2d operation.
    
    The radix-2 Cooley-Tukey algorithm asymptotically requires 5 N log2(N) floating-point operations.
    I am using this value as the flops estimate.
    
    Source:
    http://www.fftw.org/speed/method.html
    """
    if not node.input:
        return flops_registry._zero_flops(graph, node)
    in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0])
    in_shape.assert_is_fully_defined()
    n = in_shape.num_elements()
    if n == 0:
        return flops_registry._zero_flops(graph, node)
    num_ops = np.int_(np.ceil(5 * n * np.log2(n)))
    return ops.OpStats("flops", num_ops)
示例#19
0
def _conv_2d_backprop_filter_flops(graph, node):
  """Compute flops for Conv2DBackpropFilter operation."""
  # Formula same as for Conv2DBackpropInput:
  #  batch_size * image_x_dim * image_y_dim * kernel_x_dim * kernel_y_dim
  #  * input_depth * output_depth * 2 / (image_x_stride * image_x_stride)
  #
  _verify_conv_data_format(node)
  # image_shape = [batch_size, image_y_dim, image_x_dim, input_depth]
  image_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0])
  image_shape.assert_is_fully_defined()
  # kernel_shape = [kernel_y_dim, kernel_x_dim, input_depth, output_depth]
  kernel_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name)
  kernel_shape.assert_is_fully_defined()
  # strides
  strides_shape = list(node.attr["strides"].list.i)
  strides_product = strides_shape[1] * strides_shape[2]
  return ops.OpStats("flops",
                     (2 * image_shape.num_elements()
                      * kernel_shape.num_elements()
                      / (image_shape.dims[-1].value * strides_product)))
示例#20
0
def _pool_flops(graph, node):
  """Common code which compute flops for pooling operations."""
  # compute flops for average and max pooling
  _verify_conv_data_format(node)
  #
  # Pooling declaration:
  #   Inputs:
  #     - value
  #   Outputs:
  #     - output
  #   Attributes:
  #     - ksize
  #     - strides
  #     - padding
  #     - data_format
  #
  # Pooling implenetation:
  out_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name)
  out_shape.assert_is_fully_defined()
  kernel_shape = list(node.attr["ksize"].list.i)
  kernel_area = _list_product(kernel_shape)
  return ops.OpStats("flops", kernel_area * out_shape.num_elements())
示例#21
0
def main(unused_args):
    if not gfile.Exists(FLAGS.graph):
        print("Input graph file '" + FLAGS.graph + "' does not exist!")
        return -1

    graph_def = graph_pb2.GraphDef()
    with open(FLAGS.graph, "rb") as f:
        if FLAGS.input_binary:
            graph_def.ParseFromString(f.read())
        else:
            text_format.Merge(f.read(), graph_def)
    _ = tf.import_graph_def(graph_def, name="")

    statistic_types = FLAGS.statistics.split(",")
    total_stats = {}
    for statistic_type in statistic_types:
        total_stats[statistic_type] = ops.OpStats(statistic_type)
    with tf.Session() as sess:
        input_tensor = sess.graph.get_tensor_by_name(FLAGS.input_layer)
        input_shape = input_tensor.get_shape()
        input_shape = [
            FLAGS.batch_size, input_shape[1], input_shape[2], input_shape[3]
        ]
        input_tensor.set_shape(input_shape)
        for node in graph_def.node:
            for statistic_type in statistic_types:
                node_stats = ops.get_stats_for_node_def(
                    sess.graph, node, statistic_type)
                total_stats[statistic_type] += node_stats
    # Make sure we get pretty-printed numbers with separators.
    locale.setlocale(locale.LC_ALL, "")
    for statistic_type in statistic_types:
        value = total_stats[statistic_type].value
        if value is None:
            friendly_value = "None"
        else:
            friendly_value = locale.format("%d", value, grouping=True)
        print("%s=%s" % (statistic_type, friendly_value))
def _add_flops(graph, node):
    """Compute flops for the Add operation."""
    out_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name)
    out_shape.assert_is_fully_defined()
    return ops.OpStats("flops", out_shape.num_elements())
示例#23
0
def _calc_a_forward_flops(unused_graph, unused_node):
    return ops.OpStats("flops", 20)
示例#24
0
def _calc_a_weight_params(unused_graph, unused_node):
    return ops.OpStats("weight_parameters", 10)
示例#25
0
def calculate_graph_metrics(graph_def, statistic_types, input_layer,
                            input_shape_override, batch_size):
    """Looks at the performance statistics of all nodes in the graph.

    Parameters
    ----------
    graph_def : TYPE
        Description
    statistic_types : TYPE
        Description
    input_layer : TYPE
        Description
    input_shape_override : TYPE
        Description
    batch_size : TYPE
        Description

    Returns
    -------
    TYPE
        Description

    Raises
    ------
    ValueError
        Description
    """
    tf.import_graph_def(graph_def, name="")
    total_stats = {}
    node_stats = {}
    for statistic_type in statistic_types:
        total_stats[statistic_type] = ops.OpStats(statistic_type)
        node_stats[statistic_type] = {}
    # Make sure we get pretty-printed numbers with separators.
    locale.setlocale(locale.LC_ALL, "")
    with tf.Session() as sess:
        input_tensor = sess.graph.get_tensor_by_name(input_layer)
        input_shape_tensor = input_tensor.get_shape()
        if input_shape_tensor:
            input_shape = input_shape_tensor.as_list()
        else:
            input_shape = None
        if input_shape_override:
            input_shape = input_shape_override
        if input_shape is None:
            raise ValueError(
                """No input shape was provided on the command line,"""
                """ and the input op itself had no default shape, so"""
                """ shape inference couldn't be performed. This is"""
                """ required for metrics calculations.""")
        input_shape[0] = batch_size
        input_tensor.set_shape(input_shape)
        for node in graph_def.node:
            # Ensure that the updated input shape has been fully-propagated before we
            # ask for the statistics, since they may depend on the output size.
            op = sess.graph.get_operation_by_name(node.name)
            ops.set_shapes_for_outputs(op)
            for statistic_type in statistic_types:
                current_stats = ops.get_stats_for_node_def(
                    sess.graph, node, statistic_type)
                node_stats[statistic_type][node.name] = current_stats
                total_stats[statistic_type] += current_stats
    return total_stats, node_stats
示例#26
0
def _unary_op_flops(graph, node, ops_per_element=1):
  """Common code which compute flops for unary operations."""
  in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0])
  in_shape.assert_is_fully_defined()
  return ops.OpStats("flops", in_shape.num_elements() * ops_per_element)
示例#27
0
def _zero_flops(graph, node):
  """Returns zero flops."""
  del graph, node  # graph and node are unused
  return ops.OpStats("flops", 0)