def test_metadata(): x = Tensor(0) @trace(symbolic=True, capture_as_const=True) def fwd(x): return x * 2 fwd(x) orig_model = io.BytesIO() fwd.dump(orig_model, user_info="test", optimize_for_inference=False) orig_model.seek(0) graph = Net.load(orig_model) assert graph.metadata == { "user_info": "test", "graph_modified": False, # False: tracing.dump "optimized_for_inference": False, } orig_model.seek(0) graph.dump( orig_model, user_info={ "str": "x", "tensor": x, "module": M.Module, "none": None }, optimize_for_inference=True, enable_nchw4=True, enable_ioc16=True, ) orig_model.seek(0) graph = Net.load(orig_model) assert graph.metadata == { "user_info": { "str": "x", "tensor": x, "module": M.Module, "none": None }, "graph_modified": True, # True: Network.dump "optimized_for_inference": True, "enable_nchw4": True, "enable_ioc16": True, } orig_model.seek(0) fwd.dump(orig_model, enable_metadata=False) orig_model.seek(0) graph = Net.load(orig_model) assert graph.metadata is None
def test_replace_opr(): a = Tensor([1, 2]) b = Tensor([3, 4]) @trace(symbolic=True, capture_as_const=True) def fwd(a, b): return (a + b) * 2 fwd(a, b) orig_model = io.BytesIO() fwd.dump(orig_model, arg_names=["a", "b"], output_names="o", optimize_for_inference=False) orig_model.seek(0) graph = Net.load(orig_model) vara = graph.var_filter.name("a").as_unique() varb = graph.var_filter.name("b").as_unique() out1 = F.sub(vara, varb) out1 = F.relu(out1) out1 = graph.add_dep_oprs(out1) orig_opr = graph.opr_filter.has_input(vara).as_unique() repl_dict = {orig_opr: out1[0].owner} graph.replace_oprs(repl_dict) modified_model1 = io.BytesIO() graph.dump(modified_model1) modified_model1.seek(0) load_graph = GraphInference(modified_model1) out = load_graph.run(a, b) np.testing.assert_equal(out["o"], [0, 0])
def test_add_input(): a = Tensor([1, 2]) b = Tensor([3, 4]) @trace(symbolic=True, capture_as_const=True) def fwd(a, b): return (a + b) * 2 fwd(a, b) orig_model = io.BytesIO() fwd.dump(orig_model, arg_names=["a", "b"], output_names="o", optimize_for_inference=False) orig_model.seek(0) graph = Net.load(orig_model) inp_c = graph.make_input_node((2, ), np.int32, name="c") varo = graph.var_filter.name("o").as_unique() out = F.add(varo, inp_c) out.name = "o1" graph.remove_output(varo) graph.add_output(out) modified_model = io.BytesIO() graph.dump(modified_model) modified_model.seek(0) load_graph = GraphInference(modified_model) out = load_graph.run(a, b, a) np.testing.assert_equal(out["o1"], ((a + b) * 2 + a).numpy())
def check_pygraph_dump(trace_func, inp_data, expect_results, max_err=None): orig_model = io.BytesIO() inp_size = len(inp_data) out_size = len(expect_results) arg_names = ["arg_{}".format(i) for i in range(inp_size)] output_names = ["out_{}".format(i) for i in range(out_size)] trace_func.dump( orig_model, arg_names=arg_names, output_names=output_names, optimize_for_inference=False, ) orig_model.seek(0) net = Net.load(orig_model) file = io.BytesIO() net.dump(file, optimize_for_inference=False) file.seek(0) graph = GraphInference(file) inp_dict = dict([(arg_names[i], inp_data[i].numpy()) for i in range(inp_size)]) results = graph.run(inp_dict=inp_dict) for ind, tensor in enumerate(expect_results): if max_err: np.testing.assert_almost_equal(tensor.numpy(), results[output_names[ind]], max_err) else: np.testing.assert_equal(tensor.numpy(), results[output_names[ind]]) assert tensor.dtype == results[output_names[ind]].dtype
def test_set_symbolic_shape(): a = Tensor([1.0, 2.0]) @trace(symbolic=True, capture_as_const=True) def fwd(a): return F.relu(a * 2) fwd(a) orig_model = io.BytesIO() fwd.dump( orig_model, arg_names=["a"], output_names=["o"], optimize_for_inference=False, ) orig_model.seek(0) net = Net.load(orig_model) var_a = net.input_vars[0] saved_symbolic_shape = set_symbolic_shape(True) assert isinstance(var_a.shape, VarNode) set_symbolic_shape(False) assert var_a.shape == var_a.partial_shape set_symbolic_shape(saved_symbolic_shape)
def test_make_const(): a = Tensor([1, 2]) b = Tensor([3, 4]) @trace(symbolic=True, capture_as_const=True) def fwd(a, b): return (a + b) * 2 fwd(a, b) orig_model = io.BytesIO() fwd.dump(orig_model, arg_names=["a", "b"], output_names="o", optimize_for_inference=False) orig_model.seek(0) graph = Net.load(orig_model) const_b = graph.make_const(np.array([0.0, 0.0]), name="b") varb = graph.var_filter.name("b").as_unique() repl_dict = {varb: const_b} graph.replace_vars(repl_dict) modified_model = io.BytesIO() graph.dump(modified_model) modified_model.seek(0) load_graph = GraphInference(modified_model) out = load_graph.run(a) np.testing.assert_equal(out["o"], [2, 4])
def test_add_output(): a = Tensor([1.0, 2.0]) b = Tensor([3.0, 4.0]) @trace(symbolic=True, capture_as_const=True) def fwd(a, b): return (a + b) * 2 fwd(a, b) orig_model = io.BytesIO() fwd.dump( orig_model, arg_names=["a", "b"], output_names="o", optimize_for_inference=False ) orig_model.seek(0) net = Net.load(orig_model) var_a = net.var_filter.name("a").as_unique() var_b = net.var_filter.name("b").as_unique() y = F.add(var_a, var_b) y = F.sigmoid(y) y.name = "o1" net.add_output(y) modified_model = io.BytesIO() net.dump(modified_model) modified_model.seek(0) g = GraphInference(modified_model) out = g.run(a.numpy(), b.numpy()) np.testing.assert_equal(out["o"], ((a + b) * 2).numpy()) np.testing.assert_equal(out["o1"], (F.sigmoid((a + b))).numpy())
def test_modify_params(): a = Tensor([1, 2]) b = Tensor([3, 4]) @trace(symbolic=True, capture_as_const=True) def fwd(a, b): return (a + b) * 2 fwd(a, b) orig_model = io.BytesIO() fwd.dump(orig_model, arg_names=["a", "b"], output_names="o", optimize_for_inference=False) orig_model.seek(0) graph = Net.load(orig_model) param_const = graph.params_filter.as_unique() param_const.set_value(3) modified_model = io.BytesIO() graph.dump(modified_model) modified_model.seek(0) load_graph = GraphInference(modified_model) out = load_graph.run(a, b) np.testing.assert_equal(out["o"], [12, 18])
def test_replace_var(): a = Tensor([1, 2]) b = Tensor([3, 4]) @trace(symbolic=True, capture_as_const=True) def fwd(a, b): return (a + b) * 2 fwd(a, b) orig_model = io.BytesIO() fwd.dump(orig_model, arg_names=["a", "b"], output_names="o", optimize_for_inference=False) orig_model.seek(0) graph = Net.load(orig_model) vara = graph.var_filter.name("a").as_unique() varb = graph.var_filter.name("b").as_unique() out = F.mul(vara, varb) out = F.relu(out) opnode = list(graph.opr_filter.has_input(vara)) repl_dict = {opnode[0].outputs[0]: out} graph.replace_vars(repl_dict) modified_model = io.BytesIO() graph.dump(modified_model) modified_model.seek(0) load_graph = GraphInference(modified_model) out = load_graph.run(a, b) np.testing.assert_equal(out["o"], [6, 16])
def test_replace_var_in_different_network(): a = Tensor([1, 2]) b = Tensor([3, 4]) @trace(symbolic=True, capture_as_const=True) def fwd(a, b): return (a + b) * 2 @trace(symbolic=True, capture_as_const=True) def fwd1(c, d): return c + d fwd(a, b) orig_model = io.BytesIO() fwd.dump(orig_model, arg_names=["a", "b"], output_names="o", optimize_for_inference=False) orig_model.seek(0) fwd1(a, b) orig_model1 = io.BytesIO() fwd1.dump( orig_model1, arg_names=["c", "d"], output_names="o", optimize_for_inference=False, ) orig_model1.seek(0) graph = Net.load(orig_model) graph1 = Net.load(orig_model1) vara = graph.var_filter.name("a").as_unique() varb = graph.var_filter.name("b").as_unique() varo = graph1.var_filter.name("o").as_unique() graph.replace_vars({vara: varo, varb: varo}) modified_model = io.BytesIO() graph.dump(modified_model) modified_model.seek(0) load_graph = GraphInference(modified_model) out = load_graph.run(a, b) np.testing.assert_equal(out["o"], [16, 24])
def test_reset_batchsize(): @trace(symbolic=True, capture_as_const=True) def f(x): return F.exp(x) orig_model = io.BytesIO() f(Tensor(np.random.random((3, 3, 224, 224)))) f.dump(orig_model, optimize_for_inference=False) orig_model.seek(0) modified_model = io.BytesIO() net = Net.load(orig_model) net.reset_batch_size(1) net.dump(modified_model, optimize_for_inference=False) modified_model.seek(0) net1 = Net.load(modified_model) assert net1.data_providers_filter.as_unique().shape[0] == 1
def test_splice_network(): x = F.ones((2, )) y = F.ones((2, )) @trace(symbolic=True, capture_as_const=True) def fun1(a, b): return (a + b) * 2 @trace(symbolic=True, capture_as_const=True) def fun2(a): return a * 2 - 1 model = io.BytesIO() fun1(x, y) fun2(x) fun1.dump( model, arg_names=["net1_i0", "net1_i1"], output_names=["net1_o0"], optimize_for_inference=False, ) model.seek(0) net1 = Net.load(model) model.seek(0) fun2.dump( model, arg_names=["net2_i0"], output_names=["net2_o0"], optimize_for_inference=False, ) model.seek(0) net2 = Net.load(model) net1.add_output(*net2.output_vars) var = net1.var_filter.name("net1_i0").as_unique() repl_var = net2.var_filter.name("net2_o0").as_unique() net1.replace_vars({var: repl_var}) assert "net1_i0" not in [var.name for var in net1.all_vars] assert "net2_i0" in [var.name for var in net1.all_vars] model.seek(0) net1.dump(model, keep_var_name=2, optimize_for_inference=False) model.seek(0) net = Net.load(model) assert "net1_i0" not in [var.name for var in net.all_vars] assert "net2_i0" in [var.name for var in net.all_vars]
def test_modify_opr_name(): @trace(symbolic=True, capture_as_const=True) def f(x): return F.exp(x) orig_model = io.BytesIO() f(Tensor(np.random.random((3, 3, 224, 224)))) f.dump(orig_model, arg_names=["a"], optimize_for_inference=False) orig_model.seek(0) modified_model = io.BytesIO() net = Net.load(orig_model) net.modify_opr_names("net") net.modify_opr_names(lambda x: "net1." + x) net.dump(modified_model, optimize_for_inference=False) modified_model.seek(0) net1 = Net.load(modified_model) assert net1.data_providers_filter.as_unique().name == "net1.net.a"
def test_optimize_for_inference(): @trace(symbolic=True, capture_as_const=True) def f(x): return F.exp(x) orig_model = io.BytesIO() f(Tensor(5.0)) f.dump(orig_model, optimize_for_inference=False) orig_model.seek(0) optimize_model = io.BytesIO() net = Net.load(orig_model) net.dump(optimize_model, enable_io16xc32=True) optimize_model.seek(0) res = G.load_graph(optimize_model) computing_input = res.output_vars_list[0].owner.inputs[0] assert computing_input.dtype == np.float16
def test_assert_equal(): g = G.Graph() inp1 = g.make_h2d(dtype=np.float32, device="xpux") inp2 = g.make_h2d(dtype=np.float32, device="xpux") op = builtin.AssertEqual(maxerr=1e-5) out = G.apply_normal_varnode(op, inp1._node, inp2._node)[0] g.compile(out) file = io.BytesIO() out_model = G.dump_graph([out]) file.write(out_model[0]) file.seek(0) net = Net.load(file) dump_file = io.BytesIO() net.dump(dump_file) dump_file.seek(0) g = GraphInference(dump_file) g.run(np.array([1.0, 2.0]), np.array([1.0, 2.0]))
def test_add_remove_output(): a = Tensor([1.0, 2.0]) b = Tensor([3.0, 4.0]) @trace(symbolic=True, capture_as_const=True) def fwd(a, b): return (a + b) * 2, (a - b) fwd(a, b) orig_model = io.BytesIO() fwd.dump( orig_model, arg_names=["a", "b"], output_names=["o1", "o2"], optimize_for_inference=False, ) orig_model.seek(0) net = Net.load(orig_model) var_a = net.var_filter.name("a").as_unique() var_b = net.var_filter.name("b").as_unique() y1 = (var_a + var_b) * 3 y2 = F.sigmoid(var_a + var_b) net.remove_output(*net.output_vars) y1.name = "new_o1" y2.name = "new_o2" net.add_output(y1, y2) modified_model = io.BytesIO() net.dump(modified_model) modified_model.seek(0) g = GraphInference(modified_model) out = g.run(a.numpy(), b.numpy()) np.testing.assert_equal(out["new_o1"], ((a + b) * 3).numpy()) np.testing.assert_almost_equal(out["new_o2"], (F.sigmoid((a + b))).numpy())
def test_dump_cond_take(): a = Tensor([1.0, 2.0]) @trace(symbolic=True, capture_as_const=True) def fwd(a): return F.cond_take(a > 1, a) fwd(a) orig_model = io.BytesIO() fwd.dump( orig_model, arg_names=["a"], output_names=["o1", "o2"], optimize_for_inference=False, ) orig_model.seek(0) net = Net.load(orig_model) var_a = net.input_vars[0] val, idx = F.cond_take(var_a > 1, var_a) net.remove_output(*net.output_vars) val.name = "value" idx.name = "index" net.add_output(val, idx) modified_model = io.BytesIO() net.dump(modified_model) modified_model.seek(0) g = GraphInference(modified_model) out = g.run(a.numpy()) data = a.numpy() mask = a.numpy() > 1 np.testing.assert_equal(out["index"], np.where(mask.reshape(-1))[0]) np.testing.assert_equal(out["value"], data[mask])
def test_topological_sort(): @trace(symbolic=True, capture_as_const=True) def func(x, y): a = x + y a1 = F.relu(a) a2 = F.abs(a) a3 = F.ceil(a) * 2 a4 = F.floor(a) r = a1 - a2 r1 = a3 / a4 return r, r1 file = io.BytesIO() func(megengine.tensor(1.0), megengine.tensor(2.0)) func.dump(file, optimize_for_inference=False, keep_opr_name=True, keep_opr_priority=True) file.seek(0) g = Network.load(file) oprseq1 = g.all_oprs gt = [ "Host2DeviceCopy", "Host2DeviceCopy", "ADD", "RELU", "ABS", "CEIL", "ImmutableTensor", "MUL", "FLOOR", "SUB", "TRUE_DIV", ] for op, mode in zip(oprseq1, gt): if op.type == "Elemwise": assert op.params["mode"] == mode else: assert op.type == mode
def test_query(): class Model(M.Module): def __init__(self): super().__init__() self.conv1 = M.Conv2d(3, 32, 3) self.conv2 = M.Conv2d(32, 32, 3) self.conv3 = M.Conv2d(32, 32, 3) def forward(self, data): x = self.conv1(data) x = self.conv2(x) x = self.conv3(x) return x n = Model() @trace(symbolic=True, capture_as_const=True) def fwd(data): return n(data) fwd(Tensor(np.random.random((1, 3, 224, 224)))) orig_model = io.BytesIO() fwd.dump( orig_model, arg_names=["data"], output_names="o", keep_opr_name=True, keep_var_name=True, optimize_for_inference=False, ) orig_model.seek(0) graph = Net.load(orig_model) r = graph.data_providers_filter.as_count() assert r == 1 opr = graph.get_opr_by_type(Host2DeviceCopy) assert isinstance(opr, Host2DeviceCopy) r1 = graph.params_filter.as_count() assert r1 == 6 r2 = graph.opr_filter.type(N.ConvolutionForward).as_count() assert r2 == 3 r3 = graph.opr_filter.not_type(N.ConvolutionForward).as_count() assert r3 == len(graph.all_oprs) - r2 var = graph.var_filter.name("data").as_unique() r4 = graph.opr_filter.has_input(var).as_count() assert r4 == 1 r5 = graph.opr_filter.name("data").as_count() assert r5 == 1 opr = graph.get_opr_by_name("data") assert isinstance(opr, Host2DeviceCopy) var = graph.get_var_by_name("data") assert isinstance(var, VarNode) r6 = graph.var_filter.name("*bias").as_count() assert r6 == 3
def visualize( model_path: str, log_path: str, bar_length_max: int = 20, log_params: bool = True, log_flops: bool = True, ): r""" Load megengine dumped model and visualize graph structure with tensorboard log files. Can also record and print model's statistics like :func:`~.module_stats` :param model_path: dir path for megengine dumped model. :param log_path: dir path for tensorboard graph log. :param bar_length_max: size of bar indicating max flops or parameter size in net stats. :param log_params: whether print and record params size. :param log_flops: whether print and record op flops. """ if log_path: try: from tensorboard.compat.proto.attr_value_pb2 import AttrValue from tensorboard.compat.proto.config_pb2 import RunMetadata from tensorboard.compat.proto.graph_pb2 import GraphDef from tensorboard.compat.proto.node_def_pb2 import NodeDef from tensorboard.compat.proto.step_stats_pb2 import ( AllocatorMemoryUsed, DeviceStepStats, NodeExecStats, StepStats, ) from tensorboard.compat.proto.tensor_shape_pb2 import TensorShapeProto from tensorboard.compat.proto.versions_pb2 import VersionDef from tensorboardX import SummaryWriter except ImportError: logger.error( "TensorBoard and TensorboardX are required for visualize.", exc_info=True, ) return # FIXME: remove this after resolving "span dist too large" warning old_level = set_mgb_log_level(logging.ERROR) enable_receptive_field() graph = Network.load(model_path) def process_name(name): # nodes that start with point or contain float const will lead to display bug if not re.match(r"^[+-]?\d*\.\d*", name): name = name.replace(".", "/") return name.encode(encoding="utf-8") summary = [["item", "value"]] node_list = [] flops_list = [] params_list = [] for node in graph.all_oprs: if hasattr(node, "output_idx"): node_oup = node.outputs[node.output_idx] else: if len(node.outputs) != 1: logger.warning( "OpNode {} has more than one output and not has 'output_idx' attr." .format(node)) node_oup = node.outputs[0] inp_list = [process_name(var.owner.name) for var in node.inputs] if log_path: # detail format see tensorboard/compat/proto/attr_value.proto attr = { "_output_shapes": AttrValue(list=AttrValue.ListValue(shape=[ TensorShapeProto(dim=[ TensorShapeProto.Dim(size=d) for d in node_oup.shape ]) ])), "params": AttrValue(s=str(node.params).encode(encoding="utf-8")), "dtype": AttrValue(s=str(node_oup.dtype).encode(encoding="utf-8")), } flops_stats = get_op_stats(node, node.inputs, node.outputs) if flops_stats is not None: # add op flops attr if log_path and hasattr(flops_stats, "flops_num"): attr["flops"] = AttrValue( s=sizeof_fmt(flops_stats["flops"]).encode( encoding="utf-8")) flops_stats["name"] = node.name flops_stats["class_name"] = node.type flops_list.append(flops_stats) if node.type == "ImmutableTensor": param_stats = get_param_stats(node.numpy()) # add tensor size attr if log_path: attr["size"] = AttrValue( s=sizeof_fmt(param_stats["size"]).encode(encoding="utf-8")) param_stats["name"] = node.name params_list.append(param_stats) if log_path: node_list.append( NodeDef( name=process_name(node.name), op=node.type, input=inp_list, attr=attr, )) # summary extra_info = { "#ops": len(graph.all_oprs), "#params": len(params_list), } total_flops, total_param_dims, total_param_size = 0, 0, 0 if log_params: total_param_dims, total_param_size = print_param_stats( params_list, bar_length_max) extra_info["total_param_dims"] = sizeof_fmt(total_param_dims) extra_info["total_param_size"] = sizeof_fmt(total_param_size) if log_flops: total_flops = print_op_stats(flops_list, bar_length_max) extra_info["total_flops"] = sizeof_fmt(total_flops, suffix="OPs") if log_params and log_flops: extra_info["flops/param_size"] = "{:3.3f}".format(total_flops / total_param_size) if log_path: graph_def = GraphDef(node=node_list, versions=VersionDef(producer=22)) device = "/device:CPU:0" stepstats = RunMetadata(step_stats=StepStats( dev_stats=[DeviceStepStats(device=device)])) writer = SummaryWriter(log_path) writer._get_file_writer().add_graph((graph_def, stepstats)) print_summary(**extra_info) # FIXME: remove this after resolving "span dist too large" warning _imperative_rt_logger.set_log_level(old_level) return total_param_size, total_flops
def visualize( model_path: str, log_path: str, input: np.ndarray = None, inp_dict: dict = None, cal_params: bool = True, cal_flops: bool = True, cal_activations: bool = True, logging_to_stdout: bool = True, bar_length_max: int = 20, ): r""" Load megengine dumped model and visualize graph structure with tensorboard log files. Can also record and print model's statistics like :func:`~.module_stats` :param model_path: dir path for megengine dumped model. :param log_path: dir path for tensorboard graph log. :param input: user defined input data for running model and calculating stats, alternative with inp_dict, used when the model has only one input. :param inp_dict: input dict for running model and calculating stats, alternative with input, used when the model has more than one input. When both input and inp_dict are None, a random input will be used. :param cal_params: whether calculate and record params size. :param cal_flops: whether calculate and record op flops. :param cal_activations: whether calculate and record op activations. :param logging_to_stdout: whether print all calculated statistic details. :param bar_length_max: size of bar indicating max flops or parameter size in net stats. """ if log_path: try: from tensorboard.compat.proto.attr_value_pb2 import AttrValue from tensorboard.compat.proto.config_pb2 import RunMetadata from tensorboard.compat.proto.graph_pb2 import GraphDef from tensorboard.compat.proto.node_def_pb2 import NodeDef from tensorboard.compat.proto.step_stats_pb2 import ( AllocatorMemoryUsed, DeviceStepStats, NodeExecStats, StepStats, ) from tensorboard.compat.proto.tensor_shape_pb2 import TensorShapeProto from tensorboard.compat.proto.versions_pb2 import VersionDef from tensorboardX import SummaryWriter except ImportError: logger.error( "TensorBoard and TensorboardX are required for visualize.", exc_info=True, ) return enable_receptive_field() graph = Network.load(model_path) graph.reset_batch_size(1) has_input = False if input is not None or inp_dict is not None: has_input = True repl_dict = {} inp_vars = graph.input_vars if inp_dict is not None: assert len(inp_dict) == len( inp_vars ), "Inputs are not sufficient for calculation." for v in inp_vars: new_input = graph.make_const(inp_dict[v.name], name=v.name) repl_dict[v] = new_input else: assert len(inp_vars) == 1, "The graph needs more than one input." inp_var = inp_vars[0] repl_dict[inp_var] = graph.make_const(input, name=inp_var.name) graph.replace_vars(repl_dict=repl_dict) graph._compile() def process_name(name): # nodes that start with point or contain float const will lead to display bug if not re.match(r"^[+-]?\d*\.\d*", name): name = name.replace(".", "/") return name.encode(encoding="utf-8") summary = [["item", "value"]] node_list = [] flops_list = [] params_list = [] activations_list = [] total_stats = namedtuple("total_stats", ["param_size", "flops", "act_size"]) stats_details = namedtuple("module_stats", ["params", "flops", "activations"]) for node in tqdm(graph.all_oprs): if hasattr(node, "output_idx"): node_oup = node.outputs[node.output_idx] else: if len(node.outputs) != 1: logger.warning( "OpNode {} has more than one output and not has 'output_idx' attr.".format( node ) ) node_oup = node.outputs[0] inp_list = [process_name(var.owner.name) for var in node.inputs] if log_path: # detail format see tensorboard/compat/proto/attr_value.proto attr = { "_output_shapes": AttrValue( list=AttrValue.ListValue( shape=[ TensorShapeProto( dim=[ TensorShapeProto.Dim(size=d) for d in node_oup.shape ] ) ] ) ), "params": AttrValue(s=str(node.params).encode(encoding="utf-8")), "dtype": AttrValue(s=str(node_oup.dtype).encode(encoding="utf-8")), } if cal_flops: flops_stats = get_op_stats(node, node.inputs, node.outputs) if flops_stats is not None: # add op flops attr if log_path and hasattr(flops_stats, "flops_num"): attr["flops"] = AttrValue( s=sizeof_fmt(flops_stats["flops"]).encode(encoding="utf-8") ) flops_stats["name"] = node.name flops_stats["class_name"] = node.type flops_list.append(flops_stats) if cal_activations: acts = get_activation_stats(node_oup.numpy(), has_input=has_input) acts["name"] = node.name acts["class_name"] = node.type activations_list.append(acts) if cal_params: if node.type == "ImmutableTensor": param_stats = get_param_stats(node.numpy()) # add tensor size attr if log_path: attr["size"] = AttrValue( s=sizeof_fmt(param_stats["size"]).encode(encoding="utf-8") ) param_stats["name"] = node.name params_list.append(param_stats) if log_path: node_list.append( NodeDef( name=process_name(node.name), op=node.type, input=inp_list, attr=attr, ) ) # summary extra_info = { "#ops": len(graph.all_oprs), "#params": len(params_list), } ( total_flops, total_param_dims, total_param_size, total_act_dims, total_act_size, ) = (0, 0, 0, 0, 0) if cal_params: total_param_dims, total_param_size, params_list = sum_param_stats( params_list, bar_length_max ) extra_info["total_param_dims"] = sizeof_fmt(total_param_dims, suffix="") extra_info["total_param_size"] = sizeof_fmt(total_param_size) if logging_to_stdout: print_param_stats(params_list) if cal_flops: total_flops, flops_list = sum_op_stats(flops_list, bar_length_max) extra_info["total_flops"] = sizeof_fmt(total_flops, suffix="OPs") if logging_to_stdout: print_op_stats(flops_list) if cal_activations: total_act_dims, total_act_size, activations_list = sum_activations_stats( activations_list, bar_length_max ) extra_info["total_act_dims"] = sizeof_fmt(total_act_dims, suffix="") extra_info["total_act_size"] = sizeof_fmt(total_act_size) if logging_to_stdout: print_activations_stats(activations_list, has_input=has_input) if cal_flops and cal_params: extra_info["flops/param_size"] = "{:3.3f}".format( total_flops / total_param_size ) if log_path: graph_def = GraphDef(node=node_list, versions=VersionDef(producer=22)) device = "/device:CPU:0" stepstats = RunMetadata( step_stats=StepStats(dev_stats=[DeviceStepStats(device=device)]) ) writer = SummaryWriter(log_path) writer._get_file_writer().add_graph((graph_def, stepstats)) print_summary(**extra_info) return ( total_stats( param_size=total_param_size, flops=total_flops, act_size=total_act_size, ), stats_details( params=params_list, flops=flops_list, activations=activations_list ), )