示例#1
0
def _conv_2d_backprop_input_flops(graph, node):
  """Compute flops for Conv2DBackpropInput operation."""
  # Formula:
  #  batch_size * image_x_dim * image_y_dim * kernel_x_dim * kernel_y_dim
  #  * input_depth * output_depth * 2 / (image_x_stride * image_x_stride)
  #
  # Where:
  # image_x_dim, image_y_dim and input_depth --- size of input to source (no
  #   backprop) convolution, in other words they are sizes of backprop output.
  # output_depth --- number of filters in the original convolution, thus
  #   depth of backprop input.
  # kernel_x_dim and kernel_y_dim --- sizes of filter in spatial dimension
  # image_x_stride and image_x_stride --- strides of the convolution
  #
  _verify_conv_data_format(node)
  # out_shape = [batch_size, image_y_dim, image_x_dim, input_depth]
  out_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name)
  out_shape.assert_is_fully_defined()
  # kernel_shape = [kernel_y_dim, kernel_x_dim, input_depth, output_depth]
  kernel_shape = graph_util.tensor_shape_from_node_def_name(graph,
                                                            node.input[1])
  kernel_shape.assert_is_fully_defined()
  # strides
  strides_shape = list(node.attr["strides"].list.i)
  strides_product = strides_shape[1] * strides_shape[2]
  return ops.OpStats("flops",
                     (2 * out_shape.num_elements()
                      * kernel_shape.num_elements()
                      / (out_shape.dims[-1].value * strides_product)))
示例#2
0
def _reduction_op_flops(graph, node, reduce_flops=1, finalize_flops=0):
  """Common code which compute flops for reduction operations."""
  in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0])
  in_shape.assert_is_fully_defined()
  out_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name)
  out_shape.assert_is_fully_defined()
  num_flops = (in_shape.num_elements() * reduce_flops
               + out_shape.num_elements() * (finalize_flops - reduce_flops))
  return ops.OpStats("flops", num_flops)
示例#3
0
def _calc_mat_mul_flops(graph, node):
    """Calculates the compute resources needed for MatMul."""
    transpose_a = node.attr["transpose_a"].b
    a_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0])
    a_shape.assert_is_fully_defined()
    if transpose_a:
        k = int(a_shape[-2])
    else:
        k = int(a_shape[-1])
    output_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name)
    output_shape.assert_is_fully_defined()
    output_count = np.prod(output_shape.as_list())
    return ops.OpStats("flops", (k * output_count * 2))
示例#4
0
def _max_pool_grad_flops(graph, node):
  """Compute flops for MaxPoolGrad operation."""
  _verify_conv_data_format(node)
  #
  # MaxPoolGrad declaration:
  #   Inputs:
  #     - orig_input  -- original input tensor (of max_pool)
  #     - orig_output  -- original output tensor (of max_pool)
  #     - grad --  gradient with respect to output of max_pool
  #   Outputs:
  #     - output -- gradient with respect to input of max_pool
  #   Attributes:
  #     - ksize
  #     - strides
  #     - padding
  #     - data_format
  # It computes MaxPool first, then one flop per each element of original output
  #
  kernel_shape = list(node.attr["ksize"].list.i)
  kernel_area = _list_product(kernel_shape)
  orig_out_shape = graph_util.tensor_shape_from_node_def_name(graph,
                                                              node.input[1])
  orig_out_shape.assert_is_fully_defined()
  max_pool_ops = kernel_area * orig_out_shape.num_elements()
  return ops.OpStats("flops", max_pool_ops + orig_out_shape.num_elements())
示例#5
0
def _add_n_flops(graph, node):
  """Compute flops for AddN operation."""
  if not node.input:
    return _zero_flops(graph, node)
  in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0])
  in_shape.assert_is_fully_defined()
  return ops.OpStats("flops", in_shape.num_elements() * (len(node.input) - 1))
示例#6
0
def _l2_loss_flops(graph, node):
  """Compute flops for L2Loss operation."""
  in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0])
  in_shape.assert_is_fully_defined()
  # Tensorflow uses inefficient implementation, with (3*N-1) flops:
  # Optimal implementation is 2*N flops
  return ops.OpStats("flops", in_shape.num_elements() * 3 - 1)
示例#7
0
def _flops_fused_batch_norm_v3(graph, node):
    """inference is only supportted"""
    in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0])
    in_shape.assert_is_fully_defined()
    mean_shape = graph_util.tensor_shape_from_node_def_name(
        graph, node.input[3])
    mean_shape.assert_is_fully_defined()
    variance_shape = graph_util.tensor_shape_from_node_def_name(
        graph, node.input[4])
    variance_shape.assert_is_fully_defined()

    if node.attr["is_training"].b is True:
        raise ValueError("Only supports inference mode")

    num_flops = (2 * in_shape.num_elements() +
                 5 * variance_shape.num_elements() + mean_shape.num_elements())
    return ops.OpStats("flops", num_flops)
def _add_n_flops(graph, node):
    """Compute flops for AddN operation."""
    if not node.input:
        return flops_registry._zero_flops(graph, node)
    in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0])
    in_shape.assert_is_fully_defined()
    if node.attr['T'].type == tf.complex64:
        flops_per_element = 2
    else:
        flops_per_element = 1
    return ops.OpStats("flops", in_shape.num_elements() * flops_per_element * (len(node.input) - 1))
示例#9
0
def _conv_2d_backprop_filter_flops(graph, node):
  """Compute flops for Conv2DBackpropFilter operation."""
  # Formula same as for Conv2DBackpropInput:
  #  batch_size * image_x_dim * image_y_dim * kernel_x_dim * kernel_y_dim
  #  * input_depth * output_depth * 2 / (image_x_stride * image_x_stride)
  #
  _verify_conv_data_format(node)
  # image_shape = [batch_size, image_y_dim, image_x_dim, input_depth]
  image_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0])
  image_shape.assert_is_fully_defined()
  # kernel_shape = [kernel_y_dim, kernel_x_dim, input_depth, output_depth]
  kernel_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name)
  kernel_shape.assert_is_fully_defined()
  # strides
  strides_shape = list(node.attr["strides"].list.i)
  strides_product = strides_shape[1] * strides_shape[2]
  return ops.OpStats("flops",
                     (2 * image_shape.num_elements()
                      * kernel_shape.num_elements()
                      / (image_shape.dims[-1].value * strides_product)))
示例#10
0
def _ifft_2d_flops(graph, node):
    """Compute flops for ifft2d operation.
    
    Using same value as in fft2d"""
    if not node.input:
        return flops_registry._zero_flops(graph, node)
    in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0])
    in_shape.assert_is_fully_defined()
    n = in_shape.num_elements()
    if n == 0:
        return flops_registry._zero_flops(graph, node)
    num_ops = np.int_(np.ceil(5 * n * np.log2(n)))
    return ops.OpStats("flops", num_ops)
示例#11
0
def _avg_pool_grad_flops(graph, node):
  """Compute flops for AvgPoolGrad operation."""
  _verify_conv_data_format(node)
  # Pooling gradient implementation:
  out_backprop_shape = graph_util.tensor_shape_from_node_def_name(graph,
                                                                  node.input[1])
  out_backprop_shape.assert_is_fully_defined()
  kernel_shape = list(node.attr["ksize"].list.i)
  kernel_area = _list_product(kernel_shape)
  # TensorFlow multiply each element of pooling window by coefficient,
  # then sum up all of them, thus we have 2 flops per element:
  # More optimal implementation - if division is done after.
  return ops.OpStats("flops",
                     kernel_area * out_backprop_shape.num_elements() * 2)
示例#12
0
def _fft_2d_flops(graph, node):
    """Compute flops for fft2d operation.
    
    The radix-2 Cooley-Tukey algorithm asymptotically requires 5 N log2(N) floating-point operations.
    I am using this value as the flops estimate.
    
    Source:
    http://www.fftw.org/speed/method.html
    """
    if not node.input:
        return flops_registry._zero_flops(graph, node)
    in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0])
    in_shape.assert_is_fully_defined()
    n = in_shape.num_elements()
    if n == 0:
        return flops_registry._zero_flops(graph, node)
    num_ops = np.int_(np.ceil(5 * n * np.log2(n)))
    return ops.OpStats("flops", num_ops)
示例#13
0
def _pool_flops(graph, node):
  """Common code which compute flops for pooling operations."""
  # compute flops for average and max pooling
  _verify_conv_data_format(node)
  #
  # Pooling declaration:
  #   Inputs:
  #     - value
  #   Outputs:
  #     - output
  #   Attributes:
  #     - ksize
  #     - strides
  #     - padding
  #     - data_format
  #
  # Pooling implenetation:
  out_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name)
  out_shape.assert_is_fully_defined()
  kernel_shape = list(node.attr["ksize"].list.i)
  kernel_area = _list_product(kernel_shape)
  return ops.OpStats("flops", kernel_area * out_shape.num_elements())
示例#14
0
    def extract_sub_graph(input_path, dest_nodes=None, output_path=None,
                          src_nodes=None, name_prefix=""):
        """
        Extract the subgraph within the boundary defined by dest_nodes and src_nodes if name_prefix is provided
        or the subgraph comprising all nodes with name that starts with name_prefix.
        dest_nodes/src_nodes and name_prefix aren't compatible. You only need to supply one of them.
        """
        logging.info("load from %s", input_path)
        graph_def = load_graph_def_from_pb(input_path)
        logging.info("\ttotal node = %s", len(graph_def.node))

        if (dest_nodes or src_nodes) and name_prefix:
            raise RuntimeError("dest_nodes/src_nodes and name_prefix are incompatible.")
        if not name_prefix:
            if not dest_nodes:
                _, dest_nodes, _ = get_graph_def_io_nodes(graph_def)
        else:
            dest_nodes = []
            for node in graph_def.node:
                if node.name.startswith(name_prefix):
                    dest_nodes.append(node.name)
        if not src_nodes:
            src_nodes = []

        if not isinstance(dest_nodes, list):
            raise TypeError("dest_nodes must be a list.")
        if not isinstance(src_nodes, list):
            raise TypeError("src_nodes must be a list.")

        def extract_graph_summary(graph_def):
            """Extracts useful information from the graph and returns them."""
            name_to_input_name = {}  # Keyed by the dest node name.
            name_to_node = {}  # Keyed by node name.

            # Keeps track of node sequences. It is important to still output the
            # operations in the original order.
            name_to_seq_num = {}  # Keyed by node name.
            seq = 0
            for node in graph_def.node:
                n = get_node_name(node.name)
                name_to_node[n] = node
                name_to_input_name[n] = [get_node_name(x) for x in node.input]
                name_to_seq_num[n] = seq
                seq += 1
            return name_to_input_name, name_to_node, name_to_seq_num


        def assert_nodes_are_present(name_to_node, nodes):
            """Assert that nodes are present in the graph."""
            for d in nodes:
                assert d in name_to_node, "%s is not in graph" % d


        def bfs_for_reachable_nodes(target_nodes, name_to_input_name, checker=None):
            """Breadth first search for reachable nodes from target nodes."""
            nodes_to_keep = set()
            # Breadth first search to find all the nodes that we should keep.
            next_to_visit = target_nodes[:]
            while next_to_visit:
                n = next_to_visit[0]
                del next_to_visit[0]
                if n in nodes_to_keep:
                    # Already visited this node.
                    continue
                if not checker or checker(n):
                    nodes_to_keep.add(n)
                    next_to_visit += name_to_input_name[n]
            return nodes_to_keep

        name_to_input_name, name_to_node, name_to_seq_num = extract_graph_summary(
            graph_def)
        assert_nodes_are_present(name_to_node, dest_nodes)
        assert_nodes_are_present(name_to_node, src_nodes)

        src_ops = []
        def node_checker(n):
            if not n.startswith(name_prefix) or n in src_nodes:
                if name_to_node[n] not in src_ops:
                    src_ops.append(name_to_node[n])
                return False
            return True
        nodes_to_keep = bfs_for_reachable_nodes(dest_nodes, name_to_input_name, checker=node_checker)

        nodes_to_keep_list = sorted(
            list(nodes_to_keep), key=lambda n: name_to_seq_num[n])
        # Now construct the output GraphDef
        out = graph_pb2.GraphDef()
        for n in nodes_to_keep_list:
            out.node.extend([copy.deepcopy(name_to_node[n])])

        # create placeholder
        with tf.Graph().as_default() as tf_graph:
            tf.import_graph_def(graph_def, name="")
        for op in src_ops:
            placeholder_node = node_def_pb2.NodeDef()
            placeholder_node.op = "Placeholder"
            placeholder_node.name = op.name
            dtype = None
            if str(op.attr["dtype"]):
                dtype = op.attr["dtype"]
            elif str(op.attr["T"]):
                dtype = op.attr["T"]
            elif str(op.attr["output_types"]):
                dtype = attr_value_pb2.AttrValue()
                dtype.type = op.attr["output_types"].list.type[0]
            if dtype is None:
                raise RuntimeError("Cannot find dtype for Placeholder: {}".format(op.name))
            placeholder_node.attr["dtype"].CopyFrom(dtype)
            shape = graph_util.tensor_shape_from_node_def_name(tf_graph, op.name)
            placeholder_node.attr["shape"].CopyFrom(
                attr_value_pb2.AttrValue(shape=shape.as_proto())
            )
            out.node.extend([placeholder_node])

        out.library.CopyFrom(graph_def.library)
        out.versions.CopyFrom(graph_def.versions)

        if not output_path:
            output_path = append_file_name_suffix(input_path, "sub")
        logging.info("save to %s", output_path)
        logging.info("\ttotal node = %s", len(out.node))
        save_graph_def(out, output_path)
示例#15
0
def _unary_op_flops(graph, node, ops_per_element=1):
  """Common code which compute flops for unary operations."""
  in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0])
  in_shape.assert_is_fully_defined()
  return ops.OpStats("flops", in_shape.num_elements() * ops_per_element)
def _add_flops(graph, node):
    """Compute flops for the Add operation."""
    out_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name)
    out_shape.assert_is_fully_defined()
    return ops.OpStats("flops", out_shape.num_elements())
示例#17
0
def main(_):

    with tf.Graph().as_default():
        sess = tf.Session()
        print('load graph:', FLAGS.pb)
        load_model(FLAGS.pb)
        #g = load_pb(FLAGS.pb)
        g = sess.graph
        print('# of ops:', len(g.get_operations()))

        # dump data for tensorboard
        if FLAGS.tb_path:
            writer = tf.summary.FileWriter(FLAGS.tb_path, graph=g)

        from tensorflow.python.framework import graph_util
        import numpy as np
        operations = g.get_operations()
        strOpNames = ""
        i = 1
        for op in operations:
            strOpNames += "Operation:" + op.name + "\n"

        with open(FLAGS.dump_nodes_path, 'w') as file:
            file.write(strOpNames)

        lstNode = [n.name for n in g.as_graph_def().node]
        strNodeNames = ""
        for node in lstNode:
            strNodeNames += node + "\n"
        with open(FLAGS.dump_ops_path, 'w') as file:
            file.write(strNodeNames)

        #dump flops
        strFlopsInfo = "Layer, Filter Num, Filter H, Filter W, Filter D, Output H, Output W, " \
                       "Params (N*H*W*D), FLOPs (Params * output_dim^2 * 2)\n"
        lstConv2D = [n for n in g.as_graph_def().node if n.op == 'Conv2D']
        print('# of Conv2D:', len(lstConv2D))
        for node in lstConv2D:
            #print('[_calc_conv_flops]node.name', node.name)
            strFlopsInfo += node.name + ","
            input_shape = graph_util.tensor_shape_from_node_def_name(
                g, node.input[0])
            #print('[_calc_conv_flops]input_shape.as_list()', input_shape.as_list())
            #print('[_calc_conv_flops]node.input[0]', input_shape)
            filter_shape = graph_util.tensor_shape_from_node_def_name(
                g, node.input[1])
            #print('[_calc_conv_flops]node.input[1]', filter_shape)
            output_shape = graph_util.tensor_shape_from_node_def_name(
                g, node.name)
            #print('[_calc_conv_flops]output_shape', output_shape)
            filter_height = int(filter_shape[0])
            filter_width = int(filter_shape[1])
            filter_in_depth = int(filter_shape[2])
            filter_num = int(filter_shape[3])
            params = filter_in_depth * filter_height * filter_width * filter_num
            #print('[_calc_conv_flops]h:%d w:%d d:%d n:%d'% (filter_height, filter_width, filter_in_depth, filter_num))
            strFlopsInfo += str(filter_num) + "," + str(filter_height) + "," \
                          + str(filter_width) + "," + str(filter_in_depth) + ","
            #print('[_calc_conv_flops]params:%d'% params)
            #print('[_calc_conv_flops]output_shape.as_list()', output_shape.as_list())
            output_count = np.prod(output_shape.as_list()[1:], dtype=np.int64)
            output_dim = output_shape.as_list()[1:2]
            strFlopsInfo += str(output_dim[0]) + "," + str(output_dim[0]) + ","
            strFlopsInfo += str(params) + ","
            #print('[_calc_conv_flops]output_count', output_shape.as_list()[1:])
            flops = output_count * filter_in_depth * filter_height * filter_width * 2
            #print('[_calc_conv_flops]flops', flops)
            strFlopsInfo += str(flops) + "\n"

        with open(FLAGS.dump_flops_path, 'w') as file:
            file.write(strFlopsInfo)

        # parse weights
        graph_nodes = [n for n in g.as_graph_def().node]
        #wts = [n for n in graph_nodes if n.op=='Const']
        #wts = [n for n in graph_nodes if n.name=='squeezenet/conv1/Conv2D_eightbit_min_squeezenet/conv1/weights/read']
        wts = [
            n for n in graph_nodes if n.name ==
            'squeezenet/conv1/Conv2D_eightbit_reshape_squeezenet/conv1/weights/read'
        ]
        #wts = [n for n in graph_nodes if n.name=='squeezenet/conv1/weights']
        #    t = g.get_tensor_by_name('squeezenet/conv1/Conv2D_eightbit_min_input:0')
        #    print(t)

        strName = ""
        for n in wts:
            #p = tf.Print(n, [n], message="Test=========>")
            #print(p)
            print("node:", n.attr['T'])
            print("node type:", type(n.attr['T']))

        from tensorflow.python.framework import tensor_util
        strWts = ""
        for n in wts:
            strWts += "Name of the node - %s\n" % n.name