def testRegisteredNode(self): graph = ops.Graph() node = ops._NodeDef("a", "an_a") weight_params = ops.get_stats_for_node_def(graph, node, "weight_parameters") self.assertEqual(10, weight_params.value) flops = ops.get_stats_for_node_def(graph, node, "flops") self.assertEqual(20, flops.value) missing_stat = ops.get_stats_for_node_def(graph, node, "missing_stat") self.assertEqual(None, missing_stat.value)
def testRegisteredNode(self): graph = ops.Graph() node = ops._NodeDef("a", "an_a") weight_params = ops.get_stats_for_node_def(graph, node, "weight_parameters") self.assertEqual(10, weight_params.value) flops = ops.get_stats_for_node_def(graph, node, "flops") self.assertEqual(20, flops.value) missing_stat = ops.get_stats_for_node_def(graph, node, "missing_stat") self.assertEqual(None, missing_stat.value)
def main(unused_args): if not tf.gfile.Exists(FLAGS.graph): print("Input graph file '" + FLAGS.graph + "' does not exist!") return -1 graph_def = graph_pb2.GraphDef() with open(FLAGS.graph, "rb") as f: if FLAGS.input_binary: graph_def.ParseFromString(f.read()) else: text_format.Merge(f.read(), graph_def) _ = tf.import_graph_def(graph_def, name="") statistic_types = FLAGS.statistics.split(",") total_stats = {} for statistic_type in statistic_types: total_stats[statistic_type] = ops.OpStats(statistic_type) with tf.Session() as sess: input_tensor = sess.graph.get_tensor_by_name(FLAGS.input_layer) input_shape = input_tensor.get_shape() input_shape = [FLAGS.batch_size, input_shape[1], input_shape[2], input_shape[3]] input_tensor.set_shape(input_shape) for node in graph_def.node: for statistic_type in statistic_types: node_stats = ops.get_stats_for_node_def(sess.graph, node, statistic_type) total_stats[statistic_type] += node_stats # Make sure we get pretty-printed numbers with separators. locale.setlocale(locale.LC_ALL, "") for statistic_type in statistic_types: value = total_stats[statistic_type].value if value is None: friendly_value = "None" else: friendly_value = locale.format("%d", value, grouping=True) print("%s=%s" % (statistic_type, friendly_value))
def calculate_graph_metrics(graph_def, statistic_types, input_layer, input_shape_override, batch_size): """Looks at the performance statistics of all nodes in the graph.""" _ = tf.import_graph_def(graph_def, name="") total_stats = {} node_stats = {} for statistic_type in statistic_types: total_stats[statistic_type] = ops.OpStats(statistic_type) node_stats[statistic_type] = {} # Make sure we get pretty-printed numbers with separators. locale.setlocale(locale.LC_ALL, "") with tf.Session() as sess: input_tensor = sess.graph.get_tensor_by_name(input_layer) input_shape_tensor = input_tensor.get_shape() if input_shape_tensor: input_shape = input_shape_tensor.as_list() else: input_shape = None if input_shape_override: input_shape = input_shape_override input_shape[0] = batch_size input_tensor.set_shape(input_shape) for node in graph_def.node: # Ensure that the updated input shape has been fully-propagated before we # ask for the statistics, since they may depend on the output size. op = sess.graph.get_operation_by_name(node.name) ops.set_shapes_for_outputs(op) for statistic_type in statistic_types: current_stats = ops.get_stats_for_node_def( sess.graph, node, statistic_type) node_stats[statistic_type][node.name] = current_stats total_stats[statistic_type] += current_stats return total_stats, node_stats
def _get_logged_ops(graph): """Extract trainable model parameters and FLOPs for ops from a Graph. Args: graph: tf.Graph. Returns: logged_ops: dict mapping from op_name to OpLogEntry. """ logged_ops = {} graph_def = graph.as_graph_def() for node in graph_def.node: try: stats = ops.get_stats_for_node_def(graph, node, REGISTERED_FLOP_STATS) except ValueError: # Catch Exception When shape is incomplete. Skip it. stats = None if not stats or not stats.value: continue if node.name not in logged_ops: entry = tfprof_log_pb2.OpLogEntry() entry.name = node.name entry.float_ops = stats.value logged_ops[entry.name] = entry for v in graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES): if v.op.name not in logged_ops: entry = tfprof_log_pb2.OpLogEntry() entry.name = v.op.name entry.types.append(TRAINABLE_VARIABLES) logged_ops[entry.name] = entry else: logged_ops[v.op.name].types.append(TRAINABLE_VARIABLES) return logged_ops
def calculate_graph_metrics(graph_def, statistic_types, input_layer, input_shape_override, batch_size): """Looks at the performance statistics of all nodes in the graph.""" _ = tf.import_graph_def(graph_def, name="") total_stats = {} node_stats = {} for statistic_type in statistic_types: total_stats[statistic_type] = ops.OpStats(statistic_type) node_stats[statistic_type] = {} # Make sure we get pretty-printed numbers with separators. locale.setlocale(locale.LC_ALL, "") with tf.Session() as sess: input_tensor = sess.graph.get_tensor_by_name(input_layer) input_shape_tensor = input_tensor.get_shape() if input_shape_tensor: input_shape = input_shape_tensor.as_list() else: input_shape = None if input_shape_override: input_shape = input_shape_override input_shape[0] = batch_size input_tensor.set_shape(input_shape) for node in graph_def.node: # Ensure that the updated input shape has been fully-propagated before we # ask for the statistics, since they may depend on the output size. op = sess.graph.get_operation_by_name(node.name) ops.set_shapes_for_outputs(op) for statistic_type in statistic_types: current_stats = ops.get_stats_for_node_def(sess.graph, node, statistic_type) node_stats[statistic_type][node.name] = current_stats total_stats[statistic_type] += current_stats return total_stats, node_stats
def testTransposedStatistics(self): a = variables.Variable(random_ops.random_normal([16, 25])) b = variables.Variable(random_ops.random_normal([16, 9])) math_ops.matmul(a, b, transpose_a=True) g = ops.get_default_graph() for op in g.get_operations(): flops = ops.get_stats_for_node_def(g, op.node_def, "flops").value if op.name == "MatMul": self.assertEqual(7200, flops)
def _get_logged_ops(graph, run_meta=None, add_trace=False): """Extract trainable model parameters and FLOPs for ops from a Graph. Args: graph: tf.Graph. run_meta: RunMetadata proto used to complete shape information. add_trace: Whether to add op trace information. Returns: logged_ops: dict mapping from op_name to OpLogEntry. """ if run_meta: graph = _fill_missing_graph_shape(graph, run_meta) op_missing_shape = 0 logged_ops = {} for op in graph.get_operations(): try: stats = ops.get_stats_for_node_def(graph, op.node_def, REGISTERED_FLOP_STATS) except ValueError: # Catch Exception When shape is incomplete. Skip it. op_missing_shape += 1 stats = None entry = tfprof_log_pb2.OpLogEntry() entry.name = op.name add_entry = False if stats and stats.value: entry.float_ops = int(stats.value) add_entry = True if add_trace: for tb in op.traceback: trace = entry.code_def.traces.add() trace.file = tb[0] if tb[0] else 'none' trace.lineno = tb[1] if tb[1] else -1 trace.function = tb[2] if tb[2] else 'none' trace.line = tb[3] if tb[3] else 'none' add_entry = True if add_entry: logged_ops[entry.name] = entry for v in graph.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES): if v.op.name not in logged_ops: entry = tfprof_log_pb2.OpLogEntry() entry.name = v.op.name entry.types.append(TRAINABLE_VARIABLES) logged_ops[entry.name] = entry else: logged_ops[v.op.name].types.append(TRAINABLE_VARIABLES) if op_missing_shape > 0 and not run_meta: sys.stderr.write( '%d ops no flops stats due to incomplete shapes. ' 'Consider passing run_meta to use run_time shapes.\n' % op_missing_shape) return logged_ops
def testTransposedStatistics(self): a = variables.Variable(random_ops.random_normal([16, 25])) b = variables.Variable(random_ops.random_normal([16, 9])) math_ops.matmul(a, b, transpose_a=True) g = ops.get_default_graph() for op in g.get_operations(): flops = ops.get_stats_for_node_def(g, op.node_def, "flops").value if op.name == "MatMul": self.assertEqual(7200, flops)
def _get_logged_ops(graph, run_meta=None, add_trace=True): """Extract trainable model parameters and FLOPs for ops from a Graph. Args: graph: tf.Graph. run_meta: RunMetadata proto used to complete shape information. add_trace: Whether to add op trace information. Returns: logged_ops: dict mapping from op_name to OpLogEntry. """ if run_meta: graph = _fill_missing_graph_shape(graph, run_meta) op_missing_shape = 0 logged_ops = {} for op in graph.get_operations(): try: stats = ops.get_stats_for_node_def( graph, op.node_def, REGISTERED_FLOP_STATS) except ValueError: # Catch Exception When shape is incomplete. Skip it. op_missing_shape += 1 stats = None entry = tfprof_log_pb2.OpLogEntry() entry.name = op.name add_entry = False if stats and stats.value: entry.float_ops = int(stats.value) add_entry = True if add_trace: for tb in op.traceback: trace = entry.code_def.traces.add() trace.file = tb[0] if tb[0] else 'none' trace.lineno = tb[1] if tb[1] else -1 trace.function = tb[2] if tb[2] else 'none' trace.line = tb[3] if tb[3] else 'none' add_entry = True if add_entry: logged_ops[entry.name] = entry for v in graph.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES): if v.op.name not in logged_ops: entry = tfprof_log_pb2.OpLogEntry() entry.name = v.op.name entry.types.append(TRAINABLE_VARIABLES) logged_ops[entry.name] = entry else: logged_ops[v.op.name].types.append(TRAINABLE_VARIABLES) if op_missing_shape > 0 and not run_meta: sys.stderr.write('%d ops no flops stats due to incomplete shapes. ' 'Consider passing run_meta to use run_time shapes.\n' % op_missing_shape) return logged_ops
def flops_count(g): global flops total_flops = 0 for op in g.get_operations(): f=ops.get_stats_for_node_def(g,op.node_def,'flops').value if f != None: #print(op.name,":",flops) total_flops+=f #print ("total_flops:",total_flops) flops=total_flops
def testSimpleStatistics(self): g = ops.Graph() with g.as_default(): a = variables.Variable(random_ops.random_normal([25, 16])) b = variables.Variable(random_ops.random_normal([16, 9])) math_ops.matmul(a, b) for op in g.get_operations(): flops = ops.get_stats_for_node_def(g, op.node_def, "flops").value if op.name == "MatMul": self.assertEqual(7200, flops)
def testTransposedStatistics(self): g = tf.Graph() with g.as_default(): a = tf.Variable(tf.random_normal([16, 25])) b = tf.Variable(tf.random_normal([16, 9])) tf.matmul(a, b, transpose_a=True) for op in g.get_operations(): flops = ops.get_stats_for_node_def(g, op.node_def, "flops").value if op.name == "MatMul": self.assertEqual(7200, flops)
def compute_layer_flops(self, op): with self.g.as_default(): opname = op.name if opname in self.flops: flops = self.flops[opname] else: flops = tf_ops.get_stats_for_node_def(self.g, op.node_def, 'flops').value flops = flops / FLAGS.batch_size self.flops[opname] = flops return flops
def testTransposedStatistics(self): g = tf.Graph() with g.as_default(): a = tf.Variable(tf.random_normal([16, 25])) b = tf.Variable(tf.random_normal([16, 9])) tf.matmul(a, b, transpose_a=True) for op in g.get_operations(): flops = ops.get_stats_for_node_def(g, op.node_def, "flops").value if op.name == "MatMul": self.assertEqual(7200, flops)
def get_flops_for_node_list(g: tf.Graph, nodes_list: List[tf.NodeDef]) -> int: total_flops = 0 for node in nodes_list: try: stats = ops.get_stats_for_node_def(g, node, 'flops') except ValueError: stats = None if stats and stats.value: total_flops += int(stats.value) return total_flops
def _flops(op): """Get the number of flops of a convolution, from the ops stats registry. Args: op: A tf.Operation object. Returns: The number os flops needed to evaluate conv_op. """ return (ops.get_stats_for_node_def(tf.get_default_graph(), op.node_def, 'flops').value)
def _flops(op): """Get the number of flops of a convolution, from the ops stats registry. Args: op: A tf.Operation object. Returns: The number os flops needed to evaluate conv_op. """ return (ops.get_stats_for_node_def(tf.get_default_graph(), op.node_def, 'flops').value)
def _get_logged_ops(graph, run_meta=None): """Extract trainable model parameters and FLOPs for ops from a Graph. Args: graph: tf.Graph. run_meta: RunMetadata proto used to complete shape information. Returns: logged_ops: dict mapping from op_name to OpLogEntry. """ if run_meta: graph = _fill_missing_graph_shape(graph, run_meta) op_missing_shape = 0 logged_ops = {} graph_def = graph.as_graph_def() for node in graph_def.node: try: stats = ops.get_stats_for_node_def(graph, node, REGISTERED_FLOP_STATS) except ValueError: # Catch Exception When shape is incomplete. Skip it. op_missing_shape += 1 stats = None if not stats or not stats.value: continue if node.name not in logged_ops: entry = tfprof_log_pb2.OpLogEntry() entry.name = node.name entry.float_ops = int(stats.value) logged_ops[entry.name] = entry for v in graph.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES): if v.op.name not in logged_ops: entry = tfprof_log_pb2.OpLogEntry() entry.name = v.op.name entry.types.append(TRAINABLE_VARIABLES) logged_ops[entry.name] = entry else: logged_ops[v.op.name].types.append(TRAINABLE_VARIABLES) if op_missing_shape > 0 and not run_meta: sys.stderr.write( '%d ops no flops stats due to incomplete shapes. ' 'Consider passing run_meta to use run_time shapes.\n' % op_missing_shape) return logged_ops
def _get_logged_ops(graph, run_meta=None): """Extract trainable model parameters and FLOPs for ops from a Graph. Args: graph: tf.Graph. run_meta: RunMetadata proto used to complete shape information. Returns: logged_ops: dict mapping from op_name to OpLogEntry. """ if run_meta: graph = _fill_missing_graph_shape(graph, run_meta) op_missing_shape = 0 logged_ops = {} graph_def = graph.as_graph_def() for node in graph_def.node: try: stats = ops.get_stats_for_node_def(graph, node, REGISTERED_FLOP_STATS) except ValueError: # Catch Exception When shape is incomplete. Skip it. op_missing_shape += 1 stats = None if not stats or not stats.value: continue if node.name not in logged_ops: entry = tfprof_log_pb2.OpLogEntry() entry.name = node.name entry.float_ops = int(stats.value) logged_ops[entry.name] = entry for v in graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES): if v.op.name not in logged_ops: entry = tfprof_log_pb2.OpLogEntry() entry.name = v.op.name entry.types.append(TRAINABLE_VARIABLES) logged_ops[entry.name] = entry else: logged_ops[v.op.name].types.append(TRAINABLE_VARIABLES) if op_missing_shape > 0 and not run_meta: sys.stderr.write( '%d ops no flops stats due to incomplete shapes. ' 'Consider passing run_meta to use run_time shapes.\n' % op_missing_shape) return logged_ops
def compute_layer_flops(self, op): """ compute the flops of a certain convolution layer Args: operation: an convolution layer Return: The Flops """ with self.g.as_default(): opname = op.name if opname in self.flops: flops = self.flops[opname] else: flops = tf_ops.get_stats_for_node_def(self.g, op.node_def, 'flops').value flops = flops / 2. / FLAGS.batch_size self.flops[opname] = flops return flops
def main(unused_args): if not gfile.Exists(FLAGS.graph): print("Input graph file '" + FLAGS.graph + "' does not exist!") return -1 graph_def = graph_pb2.GraphDef() with open(FLAGS.graph, "rb") as f: if FLAGS.input_binary: graph_def.ParseFromString(f.read()) else: text_format.Merge(f.read(), graph_def) _ = tf.import_graph_def(graph_def, name="") statistic_types = FLAGS.statistics.split(",") total_stats = {} for statistic_type in statistic_types: total_stats[statistic_type] = ops.OpStats(statistic_type) with tf.Session() as sess: input_tensor = sess.graph.get_tensor_by_name(FLAGS.input_layer) input_shape = input_tensor.get_shape() input_shape = [ FLAGS.batch_size, input_shape[1], input_shape[2], input_shape[3] ] input_tensor.set_shape(input_shape) for node in graph_def.node: for statistic_type in statistic_types: node_stats = ops.get_stats_for_node_def( sess.graph, node, statistic_type) total_stats[statistic_type] += node_stats # Make sure we get pretty-printed numbers with separators. locale.setlocale(locale.LC_ALL, "") for statistic_type in statistic_types: value = total_stats[statistic_type].value if value is None: friendly_value = "None" else: friendly_value = locale.format("%d", value, grouping=True) print("%s=%s" % (statistic_type, friendly_value))
def new_graph(self, node_names, input_names): out_graph = graph_pb2.GraphDef() for node_name in node_names: if node_name in input_names: continue node = self.node_by_name(node_name) out_graph.node.extend([copy.deepcopy(node)]) op = self.graph.get_operation_by_name(node.name) if op.outputs: out_graph.node[-1].attr["_output_shapes"].list.shape.extend( [output.get_shape().as_proto() for output in op.outputs]) flops = get_stats_for_node_def(self.graph, node, "flops").value if flops is not None: out_graph.node[-1].attr["_flops"].i = flops for name in input_names: op = self.graph.get_operation_by_name(name) node = SubGraph._node_def("Placeholder", name) out_graph.node.extend([node]) if op.outputs: out_graph.node[-1].attr["_output_shapes"].list.shape.extend( [output.get_shape().as_proto() for output in op.outputs]) return out_graph
def _get_logged_ops(graph): """Extract trainable model parameters and FLOPs for ops from a Graph. Args: graph: tf.Graph. Returns: logged_ops: dict mapping from op_name to OpLogEntry. """ logged_ops = {} graph_def = graph.as_graph_def() for node in graph_def.node: try: stats = ops.get_stats_for_node_def(graph, node, REGISTERED_FLOP_STATS) except ValueError: # Catch Exception When shape is incomplete. Skip it. stats = None if not stats or not stats.value: continue if node.name not in logged_ops: entry = tfprof_log_pb2.OpLogEntry() entry.name = node.name entry.float_ops = stats.value logged_ops[entry.name] = entry for v in graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES): if v.op.name not in logged_ops: entry = tfprof_log_pb2.OpLogEntry() entry.name = v.op.name entry.types.append(TRAINABLE_VARIABLES) logged_ops[entry.name] = entry else: logged_ops[v.op.name].types.append(TRAINABLE_VARIABLES) return logged_ops
def begin(self): """ Method to output statistics of the model to an easy to read csv, listing the multiply accumulates(maccs) and number of parameters, in the model dir. :param session: Tensorflow session :param coord: unused """ # get graph and operations graph = tf.get_default_graph() operations = graph.get_operations() # setup dictionaries biases = defaultdict(lambda: None) stat_dict = defaultdict(lambda: { "params": 0, "maccs": 0, "adds": 0, "comps": 0 }) # iterate over tensors for tensor in operations: name = tensor.name # check is scope_name is in name, or any of the excluded strings if not self.scope_name in name or any( exclude_name in name for exclude_name in exclude_in_name): continue # Check if type is considered for the param and macc calcualtion if not tensor.type in supported_stat_ops: continue base_name = "/".join(name.split("/")[:-1]) if name.endswith("weights"): shape = tensor.node_def.attr["shape"].shape.dim sizes = [int(size.size) for size in shape] if any(base_name + "/BatchNorm" in operation.name for operation in operations) or any( base_name + "/biases" in operation.name for operation in operations): biases[base_name] = int(sizes[-1]) params = 1 for dim in sizes: params = params * dim if biases[base_name] is not None: params = params + biases[base_name] stat_dict[base_name]["params"] = params elif tensor.type == "Add": flops = ops.get_stats_for_node_def(graph, tensor.node_def, 'flops').value if flops is not None: stat_dict[name]["adds"] = flops / self.batch_size elif tensor.type == "MaxPool": flops = ops.get_stats_for_node_def(graph, tensor.node_def, 'comps').value if flops is not None: stat_dict[name]["comps"] = flops / self.batch_size elif tensor.type == "AvgPool": flops = ops.get_stats_for_node_def(graph, tensor.node_def, 'flops').value if flops is not None: stat_dict[name]["adds"] = flops / self.batch_size elif tensor.type == "MatMul" or tensor.type == "Conv2D": flops = ops.get_stats_for_node_def(graph, tensor.node_def, 'flops').value if flops is not None: stat_dict[base_name]["maccs"] += int(flops / 2 / self.batch_size) elif name.endswith("biases"): pass else: print(name, tensor.type) exit() total_params = 0 total_maccs = 0 total_comps = 0 total_adds = 0 for key, stat in stat_dict.iteritems(): total_maccs += stat["maccs"] total_params += stat["params"] total_adds += stat["adds"] total_comps += stat["comps"] stat_dict["total"] = { "maccs": total_maccs, "params": total_params, "adds": total_adds, "comps": total_comps } df = pd.DataFrame.from_dict(stat_dict, orient='index') df.to_csv(os.path.join(self.path, 'model_stats.csv'))
def _after_call(self, output): self._input_tensors = [ tensor for tensor in self._input_tensors if tensor.consumers() ] input_node_names = [ _node_name(node.name) for node in self._input_tensors ] output_nodes = output if isinstance(output, (tuple, list)) else [output] dest_node_names = [_node_name(node.name) for node in output_nodes] self._id = str(dest_node_names) graph = tf.get_default_graph() subgraph = SubGraph(graph=graph) subgraph_nodes = subgraph.extract_subgraph_nodes( dest_node_names, input_node_names) # before_input_nodes = subgraph.extract_subgraph_nodes(input_node_names) # subgraph_nodes = [node for node in subgraph_nodes if node not in before_input_nodes or node in input_node_names] if self._cached: for node in subgraph_nodes: shapes = _GraphInfo.get_output_shapes_by_node_name(node) op = subgraph.node_by_name(node).op inputs = subgraph.edges(node) self._result.append((node, op, shapes, inputs)) g = subgraph.new_graph(subgraph_nodes, input_node_names) self._graph_def = subgraph.strip_consts(g, max_const_size=32) self._graph_status.update(self.__wrapped__.__name__, self.id, self._graph_def) else: tf.logging.info("------Subgraph for {} ------".format(self.id)) vars = [] ops = [] io_info = [] for node in subgraph_nodes: shapes = _GraphInfo.get_output_shapes_by_node_name(node) op = subgraph.node_by_name(node).op inputs = subgraph.edges(node) if "Variable" in op: num_params = np.prod(shapes) vars.append((node, str(shapes), num_params)) ops.append(op) if node in input_node_names: io_info.append((node, "INPUT", shapes)) if node in dest_node_names: io_info.append((node, "OUTPUT", shapes)) flops = get_stats_for_node_def(graph, subgraph.node_by_name(node), "flops").value flops = np.nan if flops is None else flops / 1e6 fmt = " {:<40}{:15}{:<22}{:>15} {}" msg = fmt.format(node, op, str(shapes), str(flops) + "MFLOPs", str(inputs)) tf.logging.info(msg) tf.logging.info("------Variables------") fmt = " {:<40}{:<22}{}" for t in vars: tf.logging.info(fmt.format(*t)) tf.logging.info("------Ops------") fmt = " {:<40}{}" counter = Counter(ops) for t in counter.most_common(len(ops)): tf.logging.info(fmt.format(*t)) tf.logging.info("------IO------") fmt = " {:<40}{:15}{}" for t in io_info: tf.logging.info(fmt.format(*t)) tf.logging.info("------End for {}------".format(self.id))
def testUnregisteredNode(self): graph = ops.Graph() node = ops._NodeDef("b", "a_b") weight_params = ops.get_stats_for_node_def(graph, node, "weight_params") self.assertEqual(None, weight_params.value)
def _compute_statistics(self): """ Compute parameter number and flops. """ # log to file output_dir = self.params['output_dir'] if not os.path.exists(output_dir): os.mkdir(output_dir) output_dir = os.path.join(output_dir, 'statistics.log') log = logging.getLogger('tensorflow') handle = logging.FileHandler(output_dir) log.addHandler(handle) # FLOPS encoder_flops, decoder_flops = 0, 0 encoder_count, decoder_count = 0, 0 graph = tf.get_default_graph() for operation in graph.get_operations(): flops = ops.get_stats_for_node_def(graph, operation.node_def, 'flops').value if flops is None: continue if operation.name.startswith('model/encoder'): # encoder encoder_flops += flops encoder_count += 1 tf.logging.info('encoder operation %s : %d', operation.name, flops) elif operation.name.startswith('model/decoder'): # decoder decoder_flops += flops decoder_count += 1 tf.logging.info('decoder operation %s : %d', operation.name, flops) else: # gradient pass tf.logging.info('flops of %d encoder tensor: %d', encoder_count, encoder_flops) tf.logging.info('flops of %d decoder tensor: %d', decoder_count, decoder_flops) tf.logging.info('flops of total %d tensor: %d', encoder_count + decoder_count, encoder_flops + decoder_flops) # parameters encoder_parameters, decoder_parameters = 0, 0 encoder_count, decoder_count = 0, 0 for var in tf.trainable_variables(): parameters = np.prod(var.get_shape().as_list()) if var.name.startswith('model/encoder'): # encoder encoder_parameters += parameters encoder_count += 1 tf.logging.info('encoder variable %s : %d', var.name, parameters) elif var.name.startswith('model/decoder'): # decoder decoder_parameters += parameters decoder_count += 1 tf.logging.info('decoder variable %s : %d', var.name, parameters) tf.logging.info('parameters of %d encoder tensor: %d', encoder_count, encoder_parameters) tf.logging.info('parameters of %d decoder tensor: %d', decoder_count, decoder_parameters) tf.logging.info('parameters of total %d tensor: %d', encoder_count + decoder_count, encoder_parameters + decoder_parameters) # disable log to file log.removeHandler(handle)
def googlenet(dtype=tf.float32, batch_size=16, dev='gpu', width=227, height=227, g=None): x_images = var([batch_size, 3, width, height], "T_NORMAL", dtype=dtype) if dev == 'gpu' else var( [batch_size, width, height, 3], "T_NORMAL", dtype=dtype) W_1 = var([7, 7, 3, 64], "T_NORMAL", dtype=dtype) b_1 = var([64], "CONSTANT", 0.1, dtype=dtype) h_conv1 = conv2D(x_images, W_1, b_1, [1, 1, 2, 2], 'SAME', 'NCHW') if dev == 'gpu' else conv2D( x_images, W_1, b_1, [1, 2, 2, 1], 'SAME', 'NHWC') print h_conv1 h_pool1 = mxPool(h_conv1, [1, 1, 3, 3], [1, 1, 2, 2], 'SAME', 'NCHW') if dev == 'gpu' else mxPool( h_conv1, [1, 3, 3, 1], [1, 2, 2, 1], 'SAME', 'NHWC') print h_pool1 W_2 = var([1, 1, 64, 64], "T_NORMAL", dtype=dtype) b_2 = var([64], "CONSTANT", 0.1, dtype=dtype) h_conv2 = conv2D(h_pool1, W_2, b_2, [1, 1, 1, 1], 'SAME', 'NCHW') if dev == 'gpu' else conv2D( h_pool1, W_2, b_2, [1, 1, 1, 1], 'SAME', 'NHWC') print h_conv2 W_3 = var([3, 3, 64, 192], "T_NORMAL", dtype=dtype) b_3 = var([192], "CONSTANT", 0.1, dtype=dtype) h_conv3 = conv2D(h_conv2, W_3, b_3, [1, 1, 1, 1], 'SAME', 'NCHW') if dev == 'gpu' else conv2D( h_conv2, W_3, b_3, [1, 1, 1, 1], 'SAME', 'NHWC') print h_conv3 h_pool3 = mxPool(h_conv3, [1, 1, 3, 3], [1, 1, 2, 2], 'SAME', 'NCHW') if dev == 'gpu' else mxPool( h_conv3, [1, 3, 3, 1], [1, 2, 2, 1], 'SAME', 'NHWC') print h_pool3 op_list = [] op_list.append(['conv', 192, 64, [1, 1], [1, 1], 'SAME']) #op_list.append(['conv',192,96,[1,1],[1,1],'SAME']) op_list.append(['conv', 192, 128, [3, 3], [1, 1], 'SAME']) #op_list.append(['conv',192,16,[1,1],[1,1],'SAME']) op_list.append(['conv', 192, 32, [5, 5], [1, 1], 'SAME']) #op_list.append(['mxpool',192,None,[3,3],[1,1],'SAME']) op_list.append(['conv', 192, 32, [1, 1], [1, 1], 'SAME']) incept_1 = inception_("incept_v1", op_list, h_pool3, dtype, dev) print incept_1 op_list = [] op_list.append(['conv', 256, 128, [1, 1], [1, 1], 'SAME']) #op_list.append(['conv',256,128,[1,1],[1,1],'SAME']) op_list.append(['conv', 256, 192, [3, 3], [1, 1], 'SAME']) #op_list.append(['conv',256,32,[1,1],[1,1],'SAME']) op_list.append(['conv', 256, 96, [5, 5], [1, 1], 'SAME']) #op_list.append(['mxpool',256,None,[3,3],[1,1],'SAME']) op_list.append(['conv', 256, 64, [1, 1], [1, 1], 'SAME']) incept_2 = inception_("incept_v1", op_list, incept_1, dtype, dev) print incept_2 h_pool5 = mxPool(incept_2, [1, 1, 3, 3], [1, 1, 2, 2], 'SAME', 'NCHW') if dev == 'gpu' else mxPool( incept_2, [1, 3, 3, 1], [1, 2, 2, 1], 'SAME', 'NHWC') print h_pool5 op_list = [] op_list.append(['conv', 480, 192, [1, 1], [1, 1], 'SAME']) #op_list.append(['conv',480,96,[1,1],[1,1],'SAME']) op_list.append(['conv', 480, 208, [3, 3], [1, 1], 'SAME']) #op_list.append(['conv',480,16,[1,1],[1,1],'SAME']) op_list.append(['conv', 480, 48, [5, 5], [1, 1], 'SAME']) #op_list.append(['mxpool',480,None,[3,3],[1,1],'SAME']) op_list.append(['conv', 480, 64, [1, 1], [1, 1], 'SAME']) incept_3 = inception_("incept_v1", op_list, h_pool5, dtype, dev) print incept_3 op_list = [] op_list.append(['conv', 512, 160, [1, 1], [1, 1], 'SAME']) #op_list.append(['conv',512,112,[1,1],[1,1],'SAME']) op_list.append(['conv', 512, 224, [3, 3], [1, 1], 'SAME']) #op_list.append(['conv',512,24,[1,1],[1,1],'SAME']) op_list.append(['conv', 512, 64, [5, 5], [1, 1], 'SAME']) #op_list.append(['mxpool',512,None,[3,3],[1,1],'SAME']) op_list.append(['conv', 512, 64, [1, 1], [1, 1], 'SAME']) incept_4 = inception_("incept_v1", op_list, incept_3, dtype, dev) print incept_4 op_list = [] op_list.append(['conv', 512, 128, [1, 1], [1, 1], 'SAME']) #op_list.append(['conv',512,128,[1,1],[1,1],'SAME']) op_list.append(['conv', 512, 256, [3, 3], [1, 1], 'SAME']) #op_list.append(['conv',512,24,[1,1],[1,1],'SAME']) op_list.append(['conv', 512, 64, [5, 5], [1, 1], 'SAME']) #op_list.append(['mxpool',512,None,[3,3],[1,1],'SAME']) op_list.append(['conv', 512, 64, [1, 1], [1, 1], 'SAME']) incept_5 = inception_("incept_v1", op_list, incept_4, dtype, dev) print incept_5 op_list = [] op_list.append(['conv', 512, 112, [1, 1], [1, 1], 'SAME']) #op_list.append(['conv',512,144,[1,1],[1,1],'SAME']) op_list.append(['conv', 512, 288, [3, 3], [1, 1], 'SAME']) #op_list.append(['conv',512,32,[1,1],[1,1],'SAME']) op_list.append(['conv', 512, 64, [5, 5], [1, 1], 'SAME']) #op_list.append(['mxpool',512,None,[3,3],[1,1],'SAME']) op_list.append(['conv', 512, 64, [1, 1], [1, 1], 'SAME']) incept_6 = inception_("incept_v1", op_list, incept_5, dtype, dev) print incept_6 op_list = [] op_list.append(['conv', 528, 256, [1, 1], [1, 1], 'SAME']) #op_list.append(['conv',528,160,[1,1],[1,1],'SAME']) op_list.append(['conv', 528, 320, [3, 3], [1, 1], 'SAME']) #op_list.append(['conv',528,32,[1,1],[1,1],'SAME']) op_list.append(['conv', 528, 128, [5, 5], [1, 1], 'SAME']) #op_list.append(['mxpool',528,None,[3,3],[1,1],'SAME']) op_list.append(['conv', 528, 128, [1, 1], [1, 1], 'SAME']) incept_7 = inception_("incept_v1", op_list, incept_6, dtype, dev) print incept_7 h_pool6 = mxPool(incept_7, [1, 1, 3, 3], [1, 1, 2, 2], 'SAME', 'NCHW') if dev == 'gpu' else mxPool( incept_7, [1, 3, 3, 1], [1, 2, 2, 1], 'SAME', 'NHWC') print h_pool6 op_list = [] op_list.append(['conv', 832, 256, [1, 1], [1, 1], 'SAME']) #op_list.append(['conv',832,160,[1,1],[1,1],'SAME']) op_list.append(['conv', 832, 320, [3, 3], [1, 1], 'SAME']) #op_list.append(['conv',832,32,[1,1],[1,1],'SAME']) op_list.append(['conv', 832, 128, [5, 5], [1, 1], 'SAME']) #op_list.append(['mxpool',528,None,[3,3],[1,1],'SAME']) op_list.append(['conv', 832, 128, [1, 1], [1, 1], 'SAME']) incept_8 = inception_("incept_v1", op_list, h_pool6, dtype, dev) print incept_8 op_list = [] op_list.append(['conv', 832, 384, [1, 1], [1, 1], 'SAME']) #op_list.append(['conv',832,192,[1,1],[1,1],'SAME']) op_list.append(['conv', 832, 384, [3, 3], [1, 1], 'SAME']) #op_list.append(['conv',832,48,[1,1],[1,1],'SAME']) op_list.append(['conv', 832, 128, [5, 5], [1, 1], 'SAME']) #op_list.append(['mxpool',528,None,[3,3],[1,1],'SAME']) op_list.append(['conv', 832, 128, [1, 1], [1, 1], 'SAME']) incept_9 = inception_("incept_v1", op_list, incept_8, dtype, dev) print incept_9 h_pool7 = avgPool(incept_9, [1, 1, 7, 7], [1, 1, 1, 1], 'VALID', 'NCHW') if dev == 'gpu' else mxPool( incept_9, [1, 7, 7, 1], [1, 1, 1, 1], 'VALID', 'NHWC') print h_pool7 y = tf.reshape(h_pool7, [-1, 1024]) print y op_list = [op.name for op in g.get_operations()] flop_list = [ ops.get_stats_for_node_def(g, op.node_def, 'flops').value if ops.get_stats_for_node_def(g, op.node_def, 'flops').value != None else 0 for op in g.get_operations() ] if dev == 'cpu': config = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L0)), device_count={'GPU': 0}) else: config = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L0))) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) sess.run(y.op) iter = 100 start = time() for i in range(iter): if (i + 1) % 10 == 0: print "<", i / 10, ">" sess.run(y.op) elapsed = time() - start print "frames/sec:", iter * batch_size / elapsed print "flops:", sum(flop_list)
def calculate_graph_metrics(graph_def, statistic_types, input_layer, input_shape_override, batch_size): """Looks at the performance statistics of all nodes in the graph. Parameters ---------- graph_def : TYPE Description statistic_types : TYPE Description input_layer : TYPE Description input_shape_override : TYPE Description batch_size : TYPE Description Returns ------- TYPE Description Raises ------ ValueError Description """ tf.import_graph_def(graph_def, name="") total_stats = {} node_stats = {} for statistic_type in statistic_types: total_stats[statistic_type] = ops.OpStats(statistic_type) node_stats[statistic_type] = {} # Make sure we get pretty-printed numbers with separators. locale.setlocale(locale.LC_ALL, "") with tf.Session() as sess: input_tensor = sess.graph.get_tensor_by_name(input_layer) input_shape_tensor = input_tensor.get_shape() if input_shape_tensor: input_shape = input_shape_tensor.as_list() else: input_shape = None if input_shape_override: input_shape = input_shape_override if input_shape is None: raise ValueError("""No input shape was provided on the command line,""" """ and the input op itself had no default shape, so""" """ shape inference couldn't be performed. This is""" """ required for metrics calculations.""") input_shape[0] = batch_size input_tensor.set_shape(input_shape) for node in graph_def.node: # Ensure that the updated input shape has been fully-propagated before we # ask for the statistics, since they may depend on the output size. op = sess.graph.get_operation_by_name(node.name) ops.set_shapes_for_outputs(op) for statistic_type in statistic_types: current_stats = ops.get_stats_for_node_def(sess.graph, node, statistic_type) node_stats[statistic_type][node.name] = current_stats total_stats[statistic_type] += current_stats return total_stats, node_stats
def run_profiler(pipeline_config_path, run_mode, data_split, ckpt_index): avod_top_dir = avod.top_dir() # Timeline results logfile file_name = avod_top_dir + '/scripts/profilers/tf_profiler/' + \ 'tf_timeline_output.json' with tf.Session() as sess: if run_mode == 'train': # In train mode, data_split should not be 'test' as the test # split does not have gt. if data_split == 'test': raise ValueError('Data split can only be train or val' 'in train mode.') model, train_op = set_up_model_train_mode(pipeline_config_path, data_split) init = tf.global_variables_initializer() sess.run(init) elif run_mode == 'test': model, model_config = set_up_model_test_mode( pipeline_config_path, data_split) paths_config = model_config.paths_config checkpoint_dir = paths_config.checkpoint_dir prediction_dict = model.build() # Load the weights saver = tf.train.Saver() trainer_utils.load_checkpoints(checkpoint_dir, saver) if not saver.last_checkpoints: raise ValueError('Need existing checkpoints to run' 'in test_mode') checkpoint_to_restore = saver.last_checkpoints[ckpt_index] saver.restore(sess, checkpoint_to_restore) else: raise ValueError('Invalid run_mode {}'.format(run_mode)) feed_dict = model.create_feed_dict() ############################################ # Parameters and Shapes ############################################ graph = tf.get_default_graph() # Print trainable variable parameter statistics to stdout. ProfileOptionBuilder = tf.profiler.ProfileOptionBuilder # Gives the total number of trainable parameters param_stats = tf.profiler.profile( graph, options=ProfileOptionBuilder.trainable_variables_parameter()) # Gives the FLOPS for the ops tf.profiler.profile( graph, options=tf.profiler.ProfileOptionBuilder.float_operation()) run_metadata = tf.RunMetadata() if run_mode == 'train': sess.run( [train_op], options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), run_metadata=run_metadata, feed_dict=feed_dict) else: # Run in test mode sess.run( prediction_dict, options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), run_metadata=run_metadata, feed_dict=feed_dict) # The profiler gives us rounded FLOP counts # So instead query it directly and count the total op_missing_shape = 0 # op_missing_shape_names = [] total_flops = 0 for op in graph.get_operations(): try: stats = ops.get_stats_for_node_def(graph, op.node_def, 'flops') if stats.value: total_flops += stats.value except ValueError: op_missing_shape += 1 # op_missing_shape_names.append(op.name) print('=============================================================') print('Number of ops with missing shape: ', op_missing_shape) print('=============================================================') ############################################ # Log Time and Memory ############################################ # Log the analysis to file # 'code' view organizes profile using Python call stack opts = ProfileOptionBuilder( ProfileOptionBuilder.time_and_memory()).with_timeline_output( file_name).build() tf.profiler.profile(graph, run_meta=run_metadata, cmd='code', options=opts) ############################################ # Show Time and Memory on the console ############################################ tf.profiler.profile( graph, run_meta=run_metadata, cmd='op', options=tf.profiler.ProfileOptionBuilder.time_and_memory()) # print the total number of parameters print('Total params: %d' % param_stats.total_parameters) print('Total FLOPs: ', total_flops) print('=============================================================')
def _get_logged_ops(graph, run_meta=None, add_trace=True, add_trainable_var=True): """Extract trainable model parameters and FLOPs for ops from a Graph. Args: graph: tf.Graph. run_meta: RunMetadata proto used to complete shape information. add_trace: Whether to add op trace information. add_trainable_var: Whether to assign tf.trainable_variables() op type '_trainable_variables'. Returns: logged_ops: dict mapping from op_name to OpLogEntry. string_to_id: dict mapping from string to id. """ if run_meta: graph = _fill_missing_graph_shape(graph, run_meta) op_missing_shape = 0 logged_ops = {} string_to_id = dict() string_to_id['none'] = len(string_to_id) # TODO(xpan): Work with Profiler more efficiently. for op in graph.get_operations(): try: stats = ops.get_stats_for_node_def( graph, op.node_def, REGISTERED_FLOP_STATS) except ValueError: # Catch Exception When shape is incomplete. Skip it. op_missing_shape += 1 stats = None entry = tfprof_log_pb2.OpLogEntry() entry.name = op.name add_entry = False if stats and stats.value: entry.float_ops = int(stats.value) add_entry = True if add_trace: for tb in op.traceback_with_start_lines: trace = entry.code_def.traces.add() trace.file_id = _str_id(tb[0], string_to_id) if tb[0] else 0 trace.lineno = tb[1] if tb[1] else -1 trace.function_id = _str_id(tb[2], string_to_id) if tb[2] else 0 trace.line_id = _str_id(tb[3], string_to_id) if tb[3] else 0 trace.func_start_line = tb[4] if tb[4] else -1 add_entry = True if add_entry: logged_ops[entry.name] = entry if add_trainable_var: for v in graph.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES): if v.op.name not in logged_ops: entry = tfprof_log_pb2.OpLogEntry() entry.name = v.op.name entry.types.append(TRAINABLE_VARIABLES) logged_ops[entry.name] = entry else: logged_ops[v.op.name].types.append(TRAINABLE_VARIABLES) if op_missing_shape > 0 and not run_meta: sys.stderr.write('%d ops no flops stats due to incomplete shapes.\n' % op_missing_shape) return logged_ops, string_to_id
## https://stackoverflow.com/questions/45085938/tensorflow-is-there-a-way-to-measure-flops-for-a-model import tensorflow as tf import tensorflow.python.framework.ops as ops g = tf.Graph() with g.as_default(): A = tf.Variable(tf.random_normal( [25,16] )) B = tf.Variable(tf.random_normal( [16,9] )) C = tf.matmul(A,B) # shape=[25,9] for op in g.get_operations(): flops = ops.get_stats_for_node_def(g, op.node_def, 'flops').value if flops is not None: print 'Flops should be ~',2*25*16*9 print '25 x 25 x 9 would be',2*25*25*9 # ignores internal dim, repeats first print 'TF stats gives',flops g = tf.Graph() run_meta = tf.RunMetadata() with g.as_default(): A = tf.Variable(tf.random_normal( [25,16] )) B = tf.Variable(tf.random_normal( [16,9] )) C = tf.matmul(A,B) # shape=[25,9] opts = tf.profiler.ProfileOptionBuilder.float_operation() flops = tf.profiler.profile(g, run_meta=run_meta, cmd='op', options=opts) if flops is not None: print('Flops should be ~',2*25*16*9) print('25 x 25 x 9 would be',2*25*25*9) # ignores internal dim, repeats first print('TF stats gives',flops.total_float_ops)
def testUnregisteredNode(self): graph = ops.Graph() node = ops._NodeDef("b", "a_b") weight_params = ops.get_stats_for_node_def(graph, node, "weight_params") self.assertEqual(None, weight_params.value)
def calculate_graph_metrics(graph_def, statistic_types, input_layer, input_shape_override, batch_size): """Looks at the performance statistics of all nodes in the graph. Parameters ---------- graph_def : TYPE Description statistic_types : TYPE Description input_layer : TYPE Description input_shape_override : TYPE Description batch_size : TYPE Description Returns ------- TYPE Description Raises ------ ValueError Description """ tf.import_graph_def(graph_def, name="") total_stats = {} node_stats = {} for statistic_type in statistic_types: total_stats[statistic_type] = ops.OpStats(statistic_type) node_stats[statistic_type] = {} # Make sure we get pretty-printed numbers with separators. locale.setlocale(locale.LC_ALL, "") with tf.Session() as sess: input_tensor = sess.graph.get_tensor_by_name(input_layer) input_shape_tensor = input_tensor.get_shape() if input_shape_tensor: input_shape = input_shape_tensor.as_list() else: input_shape = None if input_shape_override: input_shape = input_shape_override if input_shape is None: raise ValueError( """No input shape was provided on the command line,""" """ and the input op itself had no default shape, so""" """ shape inference couldn't be performed. This is""" """ required for metrics calculations.""") input_shape[0] = batch_size input_tensor.set_shape(input_shape) for node in graph_def.node: # Ensure that the updated input shape has been fully-propagated before we # ask for the statistics, since they may depend on the output size. op = sess.graph.get_operation_by_name(node.name) ops.set_shapes_for_outputs(op) for statistic_type in statistic_types: current_stats = ops.get_stats_for_node_def( sess.graph, node, statistic_type) node_stats[statistic_type][node.name] = current_stats total_stats[statistic_type] += current_stats return total_stats, node_stats
def _get_logged_ops(graph, run_meta=None, add_trace=True, add_trainable_var=True): """Extract trainable model parameters and FLOPs for ops from a Graph. Args: graph: tf.Graph. run_meta: RunMetadata proto used to complete shape information. add_trace: Whether to add op trace information. add_trainable_var: Whether to assign tf.trainable_variables() op type '_trainable_variables'. Returns: logged_ops: dict mapping from op_name to OpLogEntry. string_to_id: dict mapping from string to id. """ if run_meta: graph = _fill_missing_graph_shape(graph, run_meta) op_missing_shape = 0 logged_ops = {} string_to_id = dict() string_to_id['none'] = len(string_to_id) # TODO (xpan): Work with Profiler more efficiently. id:3464 gh:3465 for op in graph.get_operations(): try: stats = ops.get_stats_for_node_def(graph, op.node_def, REGISTERED_FLOP_STATS) except ValueError: # Catch Exception When shape is incomplete. Skip it. op_missing_shape += 1 stats = None entry = tfprof_log_pb2.OpLogEntry() entry.name = op.name add_entry = False if stats and stats.value: entry.float_ops = int(stats.value) add_entry = True if add_trace: for tb in op.traceback_with_start_lines: trace = entry.code_def.traces.add() trace.file_id = _str_id(tb[0], string_to_id) if tb[0] else 0 trace.lineno = tb[1] if tb[1] else -1 trace.function_id = _str_id(tb[2], string_to_id) if tb[2] else 0 trace.line_id = _str_id(tb[3], string_to_id) if tb[3] else 0 trace.func_start_line = tb[4] if tb[4] else -1 add_entry = True if add_entry: logged_ops[entry.name] = entry if add_trainable_var: for v in graph.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES): if v.op.name not in logged_ops: entry = tfprof_log_pb2.OpLogEntry() entry.name = v.op.name entry.types.append(TRAINABLE_VARIABLES) logged_ops[entry.name] = entry else: logged_ops[v.op.name].types.append(TRAINABLE_VARIABLES) if op_missing_shape > 0 and not run_meta: sys.stderr.write('%d ops no flops stats due to incomplete shapes.\n' % op_missing_shape) return logged_ops, string_to_id
def alexnet(dtype=tf.float32, batch_size=16, dev='gpu', width=227, height=227, g=None): x_images = var([batch_size, 3, width, height], "T_NORMAL", dtype=dtype) if dev == 'gpu' else var( [batch_size, width, height, 3], "T_NORMAL", dtype=dtype) #x_images = tf.random_normal([batch_size,3,width,height],dtype=dtype) W_1 = var([11, 11, 3, 64], "T_NORMAL", dtype=dtype) b_1 = var([64], "CONSTANT", 0.1, dtype=dtype) h_conv1 = conv2D(x_images, W_1, b_1, [1, 1, 4, 4], 'VALID', 'NCHW') if dev == 'gpu' else conv2D( x_images, W_1, b_1, [1, 4, 4, 1], 'VALID', 'NHWC') h_pool1 = mxPool(h_conv1, [1, 1, 3, 3], [1, 1, 2, 2], 'VALID', 'NCHW') if dev == 'gpu' else mxPool( h_conv1, [1, 3, 3, 1], [1, 2, 2, 1], 'VALID', 'NHWC') print h_pool1 W_2 = var([5, 5, 64, 256], "T_NORMAL", dtype=dtype) b_2 = var([256], "CONSTANT", 0.1, dtype=dtype) h_conv2 = conv2D(h_pool1, W_2, b_2, [1, 1, 1, 1], 'SAME', 'NCHW') if dev == 'gpu' else conv2D( h_pool1, W_2, b_2, [1, 1, 1, 1], 'SAME', 'NHWC') h_pool2 = mxPool(h_conv2, [1, 1, 3, 3], [1, 1, 2, 2], 'VALID', 'NCHW') if dev == 'gpu' else mxPool( h_conv2, [1, 3, 3, 1], [1, 2, 2, 1], 'VALID', 'NHWC') print h_pool2 W_3 = var([3, 3, 256, 384], "T_NORMAL", dtype=dtype) b_3 = var([384], "CONSTANT", 0.1, dtype=dtype) h_conv3 = conv2D(h_pool2, W_3, b_3, [1, 1, 1, 1], 'SAME', 'NCHW') if dev == 'gpu' else conv2D( h_pool2, W_3, b_3, [1, 1, 1, 1], 'SAME', 'NHWC') print h_conv3 W_4 = var([3, 3, 384, 384], "T_NORMAL", dtype=dtype) b_4 = var([384], "CONSTANT", 0.1, dtype=dtype) h_conv4 = conv2D(h_conv3, W_4, b_4, [1, 1, 1, 1], 'SAME', 'NCHW') if dev == 'gpu' else conv2D( h_conv3, W_4, b_4, [1, 1, 1, 1], 'SAME', 'NHWC') print h_conv4 W_5 = var([3, 3, 384, 256], "T_NORMAL", dtype=dtype) b_5 = var([256], "CONSTANT", 0.1, dtype=dtype) h_conv5 = conv2D(h_conv4, W_5, b_5, [1, 1, 1, 1], 'SAME', 'NCHW') if dev == 'gpu' else conv2D( h_conv4, W_5, b_5, [1, 1, 1, 1], 'SAME', 'NHWC') h_pool5 = mxPool(h_conv5, [1, 1, 3, 3], [1, 1, 2, 2], 'VALID', 'NCHW') if dev == 'gpu' else mxPool( h_conv5, [1, 3, 3, 1], [1, 2, 2, 1], 'VALID', 'NHWC') print "h_:", h_pool5 dim = h_pool5.get_shape().as_list() #print dim[1]*dim[2]*dim[3] W_6 = var([dim[1] * dim[2] * dim[3], 4096], "T_NORMAL", dtype=dtype) b_6 = var([4096], "CONSTANT", 0.1, dtype=dtype) h_pool5_flat = tf.reshape(h_pool5, [-1, dim[1] * dim[2] * dim[3]]) print h_pool5 h_full6 = tf.nn.relu(tf.matmul(h_pool5_flat, W_6) + b_6) print h_full6 W_7 = var([4096, 1000], "T_NORMAL", dtype=dtype) b_7 = var([1000], "CONSTANT", 0.1, dtype=dtype) y = tf.nn.relu(tf.matmul(h_full6, W_7) + b_7) print y op_list = [op.name for op in g.get_operations()] flop_list = [ ops.get_stats_for_node_def(g, op.node_def, 'flops').value if ops.get_stats_for_node_def(g, op.node_def, 'flops').value != None else 0 for op in g.get_operations() ] #exit() # config = tf.ConfigProto(graph_options=tf.GraphOptions(optimizer_options=tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0))) # sess = tf.Session(config=config) # sess.run(tf.global_variables_initializer()) if dev == 'cpu': config = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L0)), device_count={'GPU': 0}) else: config = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L0))) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) sess.run(y.op) iter = 100 start = time() for i in range(iter): if (i + 1) % 10 == 0: print "<", i / 10, ">" sess.run(y.op) elapsed = time() - start print "frames/sec:", iter * batch_size / elapsed print "flops:", sum(flop_list)