def _conv_2d_backprop_input_flops(graph, node): """Compute flops for Conv2DBackpropInput operation.""" # Formula: # batch_size * image_x_dim * image_y_dim * kernel_x_dim * kernel_y_dim # * input_depth * output_depth * 2 / (image_x_stride * image_x_stride) # # Where: # image_x_dim, image_y_dim and input_depth --- size of input to source (no # backprop) convolution, in other words they are sizes of backprop output. # output_depth --- number of filters in the original convolution, thus # depth of backprop input. # kernel_x_dim and kernel_y_dim --- sizes of filter in spatial dimension # image_x_stride and image_x_stride --- strides of the convolution # _verify_conv_data_format(node) # out_shape = [batch_size, image_y_dim, image_x_dim, input_depth] out_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name) out_shape.assert_is_fully_defined() # kernel_shape = [kernel_y_dim, kernel_x_dim, input_depth, output_depth] kernel_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[1]) kernel_shape.assert_is_fully_defined() # strides strides_shape = list(node.attr["strides"].list.i) strides_product = strides_shape[1] * strides_shape[2] return ops.OpStats("flops", (2 * out_shape.num_elements() * kernel_shape.num_elements() / (out_shape.dims[-1].value * strides_product)))
def _reduction_op_flops(graph, node, reduce_flops=1, finalize_flops=0): """Common code which compute flops for reduction operations.""" in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) in_shape.assert_is_fully_defined() out_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name) out_shape.assert_is_fully_defined() num_flops = (in_shape.num_elements() * reduce_flops + out_shape.num_elements() * (finalize_flops - reduce_flops)) return ops.OpStats("flops", num_flops)
def _calc_mat_mul_flops(graph, node): """Calculates the compute resources needed for MatMul.""" transpose_a = node.attr["transpose_a"].b a_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) a_shape.assert_is_fully_defined() if transpose_a: k = int(a_shape[-2]) else: k = int(a_shape[-1]) output_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name) output_shape.assert_is_fully_defined() output_count = np.prod(output_shape.as_list()) return ops.OpStats("flops", (k * output_count * 2))
def _max_pool_grad_flops(graph, node): """Compute flops for MaxPoolGrad operation.""" _verify_conv_data_format(node) # # MaxPoolGrad declaration: # Inputs: # - orig_input -- original input tensor (of max_pool) # - orig_output -- original output tensor (of max_pool) # - grad -- gradient with respect to output of max_pool # Outputs: # - output -- gradient with respect to input of max_pool # Attributes: # - ksize # - strides # - padding # - data_format # It computes MaxPool first, then one flop per each element of original output # kernel_shape = list(node.attr["ksize"].list.i) kernel_area = _list_product(kernel_shape) orig_out_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[1]) orig_out_shape.assert_is_fully_defined() max_pool_ops = kernel_area * orig_out_shape.num_elements() return ops.OpStats("flops", max_pool_ops + orig_out_shape.num_elements())
def _add_n_flops(graph, node): """Compute flops for AddN operation.""" if not node.input: return _zero_flops(graph, node) in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) in_shape.assert_is_fully_defined() return ops.OpStats("flops", in_shape.num_elements() * (len(node.input) - 1))
def _l2_loss_flops(graph, node): """Compute flops for L2Loss operation.""" in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) in_shape.assert_is_fully_defined() # Tensorflow uses inefficient implementation, with (3*N-1) flops: # Optimal implementation is 2*N flops return ops.OpStats("flops", in_shape.num_elements() * 3 - 1)
def _flops_fused_batch_norm_v3(graph, node): """inference is only supportted""" in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) in_shape.assert_is_fully_defined() mean_shape = graph_util.tensor_shape_from_node_def_name( graph, node.input[3]) mean_shape.assert_is_fully_defined() variance_shape = graph_util.tensor_shape_from_node_def_name( graph, node.input[4]) variance_shape.assert_is_fully_defined() if node.attr["is_training"].b is True: raise ValueError("Only supports inference mode") num_flops = (2 * in_shape.num_elements() + 5 * variance_shape.num_elements() + mean_shape.num_elements()) return ops.OpStats("flops", num_flops)
def _add_n_flops(graph, node): """Compute flops for AddN operation.""" if not node.input: return flops_registry._zero_flops(graph, node) in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) in_shape.assert_is_fully_defined() if node.attr['T'].type == tf.complex64: flops_per_element = 2 else: flops_per_element = 1 return ops.OpStats("flops", in_shape.num_elements() * flops_per_element * (len(node.input) - 1))
def _conv_2d_backprop_filter_flops(graph, node): """Compute flops for Conv2DBackpropFilter operation.""" # Formula same as for Conv2DBackpropInput: # batch_size * image_x_dim * image_y_dim * kernel_x_dim * kernel_y_dim # * input_depth * output_depth * 2 / (image_x_stride * image_x_stride) # _verify_conv_data_format(node) # image_shape = [batch_size, image_y_dim, image_x_dim, input_depth] image_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) image_shape.assert_is_fully_defined() # kernel_shape = [kernel_y_dim, kernel_x_dim, input_depth, output_depth] kernel_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name) kernel_shape.assert_is_fully_defined() # strides strides_shape = list(node.attr["strides"].list.i) strides_product = strides_shape[1] * strides_shape[2] return ops.OpStats("flops", (2 * image_shape.num_elements() * kernel_shape.num_elements() / (image_shape.dims[-1].value * strides_product)))
def _ifft_2d_flops(graph, node): """Compute flops for ifft2d operation. Using same value as in fft2d""" if not node.input: return flops_registry._zero_flops(graph, node) in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) in_shape.assert_is_fully_defined() n = in_shape.num_elements() if n == 0: return flops_registry._zero_flops(graph, node) num_ops = np.int_(np.ceil(5 * n * np.log2(n))) return ops.OpStats("flops", num_ops)
def _avg_pool_grad_flops(graph, node): """Compute flops for AvgPoolGrad operation.""" _verify_conv_data_format(node) # Pooling gradient implementation: out_backprop_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[1]) out_backprop_shape.assert_is_fully_defined() kernel_shape = list(node.attr["ksize"].list.i) kernel_area = _list_product(kernel_shape) # TensorFlow multiply each element of pooling window by coefficient, # then sum up all of them, thus we have 2 flops per element: # More optimal implementation - if division is done after. return ops.OpStats("flops", kernel_area * out_backprop_shape.num_elements() * 2)
def _fft_2d_flops(graph, node): """Compute flops for fft2d operation. The radix-2 Cooley-Tukey algorithm asymptotically requires 5 N log2(N) floating-point operations. I am using this value as the flops estimate. Source: http://www.fftw.org/speed/method.html """ if not node.input: return flops_registry._zero_flops(graph, node) in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) in_shape.assert_is_fully_defined() n = in_shape.num_elements() if n == 0: return flops_registry._zero_flops(graph, node) num_ops = np.int_(np.ceil(5 * n * np.log2(n))) return ops.OpStats("flops", num_ops)
def _pool_flops(graph, node): """Common code which compute flops for pooling operations.""" # compute flops for average and max pooling _verify_conv_data_format(node) # # Pooling declaration: # Inputs: # - value # Outputs: # - output # Attributes: # - ksize # - strides # - padding # - data_format # # Pooling implenetation: out_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name) out_shape.assert_is_fully_defined() kernel_shape = list(node.attr["ksize"].list.i) kernel_area = _list_product(kernel_shape) return ops.OpStats("flops", kernel_area * out_shape.num_elements())
def extract_sub_graph(input_path, dest_nodes=None, output_path=None, src_nodes=None, name_prefix=""): """ Extract the subgraph within the boundary defined by dest_nodes and src_nodes if name_prefix is provided or the subgraph comprising all nodes with name that starts with name_prefix. dest_nodes/src_nodes and name_prefix aren't compatible. You only need to supply one of them. """ logging.info("load from %s", input_path) graph_def = load_graph_def_from_pb(input_path) logging.info("\ttotal node = %s", len(graph_def.node)) if (dest_nodes or src_nodes) and name_prefix: raise RuntimeError("dest_nodes/src_nodes and name_prefix are incompatible.") if not name_prefix: if not dest_nodes: _, dest_nodes, _ = get_graph_def_io_nodes(graph_def) else: dest_nodes = [] for node in graph_def.node: if node.name.startswith(name_prefix): dest_nodes.append(node.name) if not src_nodes: src_nodes = [] if not isinstance(dest_nodes, list): raise TypeError("dest_nodes must be a list.") if not isinstance(src_nodes, list): raise TypeError("src_nodes must be a list.") def extract_graph_summary(graph_def): """Extracts useful information from the graph and returns them.""" name_to_input_name = {} # Keyed by the dest node name. name_to_node = {} # Keyed by node name. # Keeps track of node sequences. It is important to still output the # operations in the original order. name_to_seq_num = {} # Keyed by node name. seq = 0 for node in graph_def.node: n = get_node_name(node.name) name_to_node[n] = node name_to_input_name[n] = [get_node_name(x) for x in node.input] name_to_seq_num[n] = seq seq += 1 return name_to_input_name, name_to_node, name_to_seq_num def assert_nodes_are_present(name_to_node, nodes): """Assert that nodes are present in the graph.""" for d in nodes: assert d in name_to_node, "%s is not in graph" % d def bfs_for_reachable_nodes(target_nodes, name_to_input_name, checker=None): """Breadth first search for reachable nodes from target nodes.""" nodes_to_keep = set() # Breadth first search to find all the nodes that we should keep. next_to_visit = target_nodes[:] while next_to_visit: n = next_to_visit[0] del next_to_visit[0] if n in nodes_to_keep: # Already visited this node. continue if not checker or checker(n): nodes_to_keep.add(n) next_to_visit += name_to_input_name[n] return nodes_to_keep name_to_input_name, name_to_node, name_to_seq_num = extract_graph_summary( graph_def) assert_nodes_are_present(name_to_node, dest_nodes) assert_nodes_are_present(name_to_node, src_nodes) src_ops = [] def node_checker(n): if not n.startswith(name_prefix) or n in src_nodes: if name_to_node[n] not in src_ops: src_ops.append(name_to_node[n]) return False return True nodes_to_keep = bfs_for_reachable_nodes(dest_nodes, name_to_input_name, checker=node_checker) nodes_to_keep_list = sorted( list(nodes_to_keep), key=lambda n: name_to_seq_num[n]) # Now construct the output GraphDef out = graph_pb2.GraphDef() for n in nodes_to_keep_list: out.node.extend([copy.deepcopy(name_to_node[n])]) # create placeholder with tf.Graph().as_default() as tf_graph: tf.import_graph_def(graph_def, name="") for op in src_ops: placeholder_node = node_def_pb2.NodeDef() placeholder_node.op = "Placeholder" placeholder_node.name = op.name dtype = None if str(op.attr["dtype"]): dtype = op.attr["dtype"] elif str(op.attr["T"]): dtype = op.attr["T"] elif str(op.attr["output_types"]): dtype = attr_value_pb2.AttrValue() dtype.type = op.attr["output_types"].list.type[0] if dtype is None: raise RuntimeError("Cannot find dtype for Placeholder: {}".format(op.name)) placeholder_node.attr["dtype"].CopyFrom(dtype) shape = graph_util.tensor_shape_from_node_def_name(tf_graph, op.name) placeholder_node.attr["shape"].CopyFrom( attr_value_pb2.AttrValue(shape=shape.as_proto()) ) out.node.extend([placeholder_node]) out.library.CopyFrom(graph_def.library) out.versions.CopyFrom(graph_def.versions) if not output_path: output_path = append_file_name_suffix(input_path, "sub") logging.info("save to %s", output_path) logging.info("\ttotal node = %s", len(out.node)) save_graph_def(out, output_path)
def _unary_op_flops(graph, node, ops_per_element=1): """Common code which compute flops for unary operations.""" in_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) in_shape.assert_is_fully_defined() return ops.OpStats("flops", in_shape.num_elements() * ops_per_element)
def _add_flops(graph, node): """Compute flops for the Add operation.""" out_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name) out_shape.assert_is_fully_defined() return ops.OpStats("flops", out_shape.num_elements())
def main(_): with tf.Graph().as_default(): sess = tf.Session() print('load graph:', FLAGS.pb) load_model(FLAGS.pb) #g = load_pb(FLAGS.pb) g = sess.graph print('# of ops:', len(g.get_operations())) # dump data for tensorboard if FLAGS.tb_path: writer = tf.summary.FileWriter(FLAGS.tb_path, graph=g) from tensorflow.python.framework import graph_util import numpy as np operations = g.get_operations() strOpNames = "" i = 1 for op in operations: strOpNames += "Operation:" + op.name + "\n" with open(FLAGS.dump_nodes_path, 'w') as file: file.write(strOpNames) lstNode = [n.name for n in g.as_graph_def().node] strNodeNames = "" for node in lstNode: strNodeNames += node + "\n" with open(FLAGS.dump_ops_path, 'w') as file: file.write(strNodeNames) #dump flops strFlopsInfo = "Layer, Filter Num, Filter H, Filter W, Filter D, Output H, Output W, " \ "Params (N*H*W*D), FLOPs (Params * output_dim^2 * 2)\n" lstConv2D = [n for n in g.as_graph_def().node if n.op == 'Conv2D'] print('# of Conv2D:', len(lstConv2D)) for node in lstConv2D: #print('[_calc_conv_flops]node.name', node.name) strFlopsInfo += node.name + "," input_shape = graph_util.tensor_shape_from_node_def_name( g, node.input[0]) #print('[_calc_conv_flops]input_shape.as_list()', input_shape.as_list()) #print('[_calc_conv_flops]node.input[0]', input_shape) filter_shape = graph_util.tensor_shape_from_node_def_name( g, node.input[1]) #print('[_calc_conv_flops]node.input[1]', filter_shape) output_shape = graph_util.tensor_shape_from_node_def_name( g, node.name) #print('[_calc_conv_flops]output_shape', output_shape) filter_height = int(filter_shape[0]) filter_width = int(filter_shape[1]) filter_in_depth = int(filter_shape[2]) filter_num = int(filter_shape[3]) params = filter_in_depth * filter_height * filter_width * filter_num #print('[_calc_conv_flops]h:%d w:%d d:%d n:%d'% (filter_height, filter_width, filter_in_depth, filter_num)) strFlopsInfo += str(filter_num) + "," + str(filter_height) + "," \ + str(filter_width) + "," + str(filter_in_depth) + "," #print('[_calc_conv_flops]params:%d'% params) #print('[_calc_conv_flops]output_shape.as_list()', output_shape.as_list()) output_count = np.prod(output_shape.as_list()[1:], dtype=np.int64) output_dim = output_shape.as_list()[1:2] strFlopsInfo += str(output_dim[0]) + "," + str(output_dim[0]) + "," strFlopsInfo += str(params) + "," #print('[_calc_conv_flops]output_count', output_shape.as_list()[1:]) flops = output_count * filter_in_depth * filter_height * filter_width * 2 #print('[_calc_conv_flops]flops', flops) strFlopsInfo += str(flops) + "\n" with open(FLAGS.dump_flops_path, 'w') as file: file.write(strFlopsInfo) # parse weights graph_nodes = [n for n in g.as_graph_def().node] #wts = [n for n in graph_nodes if n.op=='Const'] #wts = [n for n in graph_nodes if n.name=='squeezenet/conv1/Conv2D_eightbit_min_squeezenet/conv1/weights/read'] wts = [ n for n in graph_nodes if n.name == 'squeezenet/conv1/Conv2D_eightbit_reshape_squeezenet/conv1/weights/read' ] #wts = [n for n in graph_nodes if n.name=='squeezenet/conv1/weights'] # t = g.get_tensor_by_name('squeezenet/conv1/Conv2D_eightbit_min_input:0') # print(t) strName = "" for n in wts: #p = tf.Print(n, [n], message="Test=========>") #print(p) print("node:", n.attr['T']) print("node type:", type(n.attr['T'])) from tensorflow.python.framework import tensor_util strWts = "" for n in wts: strWts += "Name of the node - %s\n" % n.name