示例#1
0
def test_metadata():
    x = Tensor(0)

    @trace(symbolic=True, capture_as_const=True)
    def fwd(x):
        return x * 2

    fwd(x)

    orig_model = io.BytesIO()
    fwd.dump(orig_model, user_info="test", optimize_for_inference=False)
    orig_model.seek(0)
    graph = Net.load(orig_model)
    assert graph.metadata == {
        "user_info": "test",
        "graph_modified": False,  # False: tracing.dump
        "optimized_for_inference": False,
    }

    orig_model.seek(0)
    graph.dump(
        orig_model,
        user_info={
            "str": "x",
            "tensor": x,
            "module": M.Module,
            "none": None
        },
        optimize_for_inference=True,
        enable_nchw4=True,
        enable_ioc16=True,
    )
    orig_model.seek(0)
    graph = Net.load(orig_model)
    assert graph.metadata == {
        "user_info": {
            "str": "x",
            "tensor": x,
            "module": M.Module,
            "none": None
        },
        "graph_modified": True,  # True: Network.dump
        "optimized_for_inference": True,
        "enable_nchw4": True,
        "enable_ioc16": True,
    }

    orig_model.seek(0)
    fwd.dump(orig_model, enable_metadata=False)
    orig_model.seek(0)
    graph = Net.load(orig_model)
    assert graph.metadata is None
示例#2
0
def test_replace_opr():

    a = Tensor([1, 2])
    b = Tensor([3, 4])

    @trace(symbolic=True, capture_as_const=True)
    def fwd(a, b):
        return (a + b) * 2

    fwd(a, b)
    orig_model = io.BytesIO()
    fwd.dump(orig_model,
             arg_names=["a", "b"],
             output_names="o",
             optimize_for_inference=False)
    orig_model.seek(0)

    graph = Net.load(orig_model)
    vara = graph.var_filter.name("a").as_unique()
    varb = graph.var_filter.name("b").as_unique()

    out1 = F.sub(vara, varb)
    out1 = F.relu(out1)
    out1 = graph.add_dep_oprs(out1)
    orig_opr = graph.opr_filter.has_input(vara).as_unique()

    repl_dict = {orig_opr: out1[0].owner}
    graph.replace_oprs(repl_dict)
    modified_model1 = io.BytesIO()
    graph.dump(modified_model1)
    modified_model1.seek(0)

    load_graph = GraphInference(modified_model1)
    out = load_graph.run(a, b)
    np.testing.assert_equal(out["o"], [0, 0])
示例#3
0
def test_add_input():

    a = Tensor([1, 2])
    b = Tensor([3, 4])

    @trace(symbolic=True, capture_as_const=True)
    def fwd(a, b):
        return (a + b) * 2

    fwd(a, b)
    orig_model = io.BytesIO()
    fwd.dump(orig_model,
             arg_names=["a", "b"],
             output_names="o",
             optimize_for_inference=False)
    orig_model.seek(0)

    graph = Net.load(orig_model)
    inp_c = graph.make_input_node((2, ), np.int32, name="c")
    varo = graph.var_filter.name("o").as_unique()

    out = F.add(varo, inp_c)
    out.name = "o1"
    graph.remove_output(varo)
    graph.add_output(out)
    modified_model = io.BytesIO()

    graph.dump(modified_model)
    modified_model.seek(0)
    load_graph = GraphInference(modified_model)

    out = load_graph.run(a, b, a)
    np.testing.assert_equal(out["o1"], ((a + b) * 2 + a).numpy())
示例#4
0
def check_pygraph_dump(trace_func, inp_data, expect_results, max_err=None):
    orig_model = io.BytesIO()
    inp_size = len(inp_data)
    out_size = len(expect_results)
    arg_names = ["arg_{}".format(i) for i in range(inp_size)]
    output_names = ["out_{}".format(i) for i in range(out_size)]
    trace_func.dump(
        orig_model,
        arg_names=arg_names,
        output_names=output_names,
        optimize_for_inference=False,
    )
    orig_model.seek(0)

    net = Net.load(orig_model)
    file = io.BytesIO()
    net.dump(file, optimize_for_inference=False)
    file.seek(0)
    graph = GraphInference(file)

    inp_dict = dict([(arg_names[i], inp_data[i].numpy())
                     for i in range(inp_size)])
    results = graph.run(inp_dict=inp_dict)

    for ind, tensor in enumerate(expect_results):
        if max_err:
            np.testing.assert_almost_equal(tensor.numpy(),
                                           results[output_names[ind]], max_err)
        else:
            np.testing.assert_equal(tensor.numpy(), results[output_names[ind]])
        assert tensor.dtype == results[output_names[ind]].dtype
示例#5
0
def test_set_symbolic_shape():

    a = Tensor([1.0, 2.0])

    @trace(symbolic=True, capture_as_const=True)
    def fwd(a):
        return F.relu(a * 2)

    fwd(a)
    orig_model = io.BytesIO()
    fwd.dump(
        orig_model,
        arg_names=["a"],
        output_names=["o"],
        optimize_for_inference=False,
    )
    orig_model.seek(0)
    net = Net.load(orig_model)
    var_a = net.input_vars[0]

    saved_symbolic_shape = set_symbolic_shape(True)
    assert isinstance(var_a.shape, VarNode)
    set_symbolic_shape(False)
    assert var_a.shape == var_a.partial_shape
    set_symbolic_shape(saved_symbolic_shape)
示例#6
0
def test_make_const():

    a = Tensor([1, 2])
    b = Tensor([3, 4])

    @trace(symbolic=True, capture_as_const=True)
    def fwd(a, b):
        return (a + b) * 2

    fwd(a, b)
    orig_model = io.BytesIO()
    fwd.dump(orig_model,
             arg_names=["a", "b"],
             output_names="o",
             optimize_for_inference=False)
    orig_model.seek(0)

    graph = Net.load(orig_model)
    const_b = graph.make_const(np.array([0.0, 0.0]), name="b")
    varb = graph.var_filter.name("b").as_unique()

    repl_dict = {varb: const_b}
    graph.replace_vars(repl_dict)

    modified_model = io.BytesIO()
    graph.dump(modified_model)
    modified_model.seek(0)
    load_graph = GraphInference(modified_model)

    out = load_graph.run(a)
    np.testing.assert_equal(out["o"], [2, 4])
示例#7
0
def test_add_output():

    a = Tensor([1.0, 2.0])
    b = Tensor([3.0, 4.0])

    @trace(symbolic=True, capture_as_const=True)
    def fwd(a, b):
        return (a + b) * 2

    fwd(a, b)
    orig_model = io.BytesIO()
    fwd.dump(
        orig_model, arg_names=["a", "b"], output_names="o", optimize_for_inference=False
    )
    orig_model.seek(0)

    net = Net.load(orig_model)
    var_a = net.var_filter.name("a").as_unique()
    var_b = net.var_filter.name("b").as_unique()

    y = F.add(var_a, var_b)
    y = F.sigmoid(y)

    y.name = "o1"
    net.add_output(y)

    modified_model = io.BytesIO()
    net.dump(modified_model)
    modified_model.seek(0)

    g = GraphInference(modified_model)
    out = g.run(a.numpy(), b.numpy())

    np.testing.assert_equal(out["o"], ((a + b) * 2).numpy())
    np.testing.assert_equal(out["o1"], (F.sigmoid((a + b))).numpy())
示例#8
0
def test_modify_params():

    a = Tensor([1, 2])
    b = Tensor([3, 4])

    @trace(symbolic=True, capture_as_const=True)
    def fwd(a, b):
        return (a + b) * 2

    fwd(a, b)
    orig_model = io.BytesIO()
    fwd.dump(orig_model,
             arg_names=["a", "b"],
             output_names="o",
             optimize_for_inference=False)
    orig_model.seek(0)

    graph = Net.load(orig_model)
    param_const = graph.params_filter.as_unique()
    param_const.set_value(3)

    modified_model = io.BytesIO()
    graph.dump(modified_model)
    modified_model.seek(0)
    load_graph = GraphInference(modified_model)

    out = load_graph.run(a, b)
    np.testing.assert_equal(out["o"], [12, 18])
示例#9
0
def test_replace_var():

    a = Tensor([1, 2])
    b = Tensor([3, 4])

    @trace(symbolic=True, capture_as_const=True)
    def fwd(a, b):
        return (a + b) * 2

    fwd(a, b)
    orig_model = io.BytesIO()
    fwd.dump(orig_model,
             arg_names=["a", "b"],
             output_names="o",
             optimize_for_inference=False)
    orig_model.seek(0)

    graph = Net.load(orig_model)
    vara = graph.var_filter.name("a").as_unique()
    varb = graph.var_filter.name("b").as_unique()

    out = F.mul(vara, varb)
    out = F.relu(out)

    opnode = list(graph.opr_filter.has_input(vara))
    repl_dict = {opnode[0].outputs[0]: out}
    graph.replace_vars(repl_dict)

    modified_model = io.BytesIO()
    graph.dump(modified_model)
    modified_model.seek(0)
    load_graph = GraphInference(modified_model)

    out = load_graph.run(a, b)
    np.testing.assert_equal(out["o"], [6, 16])
示例#10
0
def test_replace_var_in_different_network():

    a = Tensor([1, 2])
    b = Tensor([3, 4])

    @trace(symbolic=True, capture_as_const=True)
    def fwd(a, b):
        return (a + b) * 2

    @trace(symbolic=True, capture_as_const=True)
    def fwd1(c, d):
        return c + d

    fwd(a, b)
    orig_model = io.BytesIO()
    fwd.dump(orig_model,
             arg_names=["a", "b"],
             output_names="o",
             optimize_for_inference=False)
    orig_model.seek(0)

    fwd1(a, b)
    orig_model1 = io.BytesIO()
    fwd1.dump(
        orig_model1,
        arg_names=["c", "d"],
        output_names="o",
        optimize_for_inference=False,
    )
    orig_model1.seek(0)

    graph = Net.load(orig_model)
    graph1 = Net.load(orig_model1)
    vara = graph.var_filter.name("a").as_unique()
    varb = graph.var_filter.name("b").as_unique()
    varo = graph1.var_filter.name("o").as_unique()

    graph.replace_vars({vara: varo, varb: varo})

    modified_model = io.BytesIO()
    graph.dump(modified_model)
    modified_model.seek(0)
    load_graph = GraphInference(modified_model)

    out = load_graph.run(a, b)
    np.testing.assert_equal(out["o"], [16, 24])
示例#11
0
def test_reset_batchsize():
    @trace(symbolic=True, capture_as_const=True)
    def f(x):
        return F.exp(x)

    orig_model = io.BytesIO()
    f(Tensor(np.random.random((3, 3, 224, 224))))
    f.dump(orig_model, optimize_for_inference=False)
    orig_model.seek(0)

    modified_model = io.BytesIO()
    net = Net.load(orig_model)
    net.reset_batch_size(1)
    net.dump(modified_model, optimize_for_inference=False)
    modified_model.seek(0)

    net1 = Net.load(modified_model)
    assert net1.data_providers_filter.as_unique().shape[0] == 1
示例#12
0
def test_splice_network():
    x = F.ones((2, ))
    y = F.ones((2, ))

    @trace(symbolic=True, capture_as_const=True)
    def fun1(a, b):
        return (a + b) * 2

    @trace(symbolic=True, capture_as_const=True)
    def fun2(a):
        return a * 2 - 1

    model = io.BytesIO()
    fun1(x, y)
    fun2(x)
    fun1.dump(
        model,
        arg_names=["net1_i0", "net1_i1"],
        output_names=["net1_o0"],
        optimize_for_inference=False,
    )
    model.seek(0)
    net1 = Net.load(model)
    model.seek(0)
    fun2.dump(
        model,
        arg_names=["net2_i0"],
        output_names=["net2_o0"],
        optimize_for_inference=False,
    )
    model.seek(0)
    net2 = Net.load(model)
    net1.add_output(*net2.output_vars)
    var = net1.var_filter.name("net1_i0").as_unique()
    repl_var = net2.var_filter.name("net2_o0").as_unique()
    net1.replace_vars({var: repl_var})
    assert "net1_i0" not in [var.name for var in net1.all_vars]
    assert "net2_i0" in [var.name for var in net1.all_vars]
    model.seek(0)
    net1.dump(model, keep_var_name=2, optimize_for_inference=False)
    model.seek(0)
    net = Net.load(model)
    assert "net1_i0" not in [var.name for var in net.all_vars]
    assert "net2_i0" in [var.name for var in net.all_vars]
示例#13
0
def test_modify_opr_name():
    @trace(symbolic=True, capture_as_const=True)
    def f(x):
        return F.exp(x)

    orig_model = io.BytesIO()
    f(Tensor(np.random.random((3, 3, 224, 224))))
    f.dump(orig_model, arg_names=["a"], optimize_for_inference=False)
    orig_model.seek(0)

    modified_model = io.BytesIO()
    net = Net.load(orig_model)
    net.modify_opr_names("net")
    net.modify_opr_names(lambda x: "net1." + x)
    net.dump(modified_model, optimize_for_inference=False)
    modified_model.seek(0)

    net1 = Net.load(modified_model)
    assert net1.data_providers_filter.as_unique().name == "net1.net.a"
示例#14
0
def test_optimize_for_inference():
    @trace(symbolic=True, capture_as_const=True)
    def f(x):
        return F.exp(x)

    orig_model = io.BytesIO()
    f(Tensor(5.0))
    f.dump(orig_model, optimize_for_inference=False)
    orig_model.seek(0)

    optimize_model = io.BytesIO()
    net = Net.load(orig_model)
    net.dump(optimize_model, enable_io16xc32=True)
    optimize_model.seek(0)

    res = G.load_graph(optimize_model)
    computing_input = res.output_vars_list[0].owner.inputs[0]
    assert computing_input.dtype == np.float16
示例#15
0
def test_assert_equal():
    g = G.Graph()
    inp1 = g.make_h2d(dtype=np.float32, device="xpux")
    inp2 = g.make_h2d(dtype=np.float32, device="xpux")
    op = builtin.AssertEqual(maxerr=1e-5)
    out = G.apply_normal_varnode(op, inp1._node, inp2._node)[0]
    g.compile(out)
    file = io.BytesIO()
    out_model = G.dump_graph([out])
    file.write(out_model[0])
    file.seek(0)
    net = Net.load(file)

    dump_file = io.BytesIO()
    net.dump(dump_file)
    dump_file.seek(0)
    g = GraphInference(dump_file)
    g.run(np.array([1.0, 2.0]), np.array([1.0, 2.0]))
示例#16
0
def test_add_remove_output():

    a = Tensor([1.0, 2.0])
    b = Tensor([3.0, 4.0])

    @trace(symbolic=True, capture_as_const=True)
    def fwd(a, b):
        return (a + b) * 2, (a - b)

    fwd(a, b)
    orig_model = io.BytesIO()
    fwd.dump(
        orig_model,
        arg_names=["a", "b"],
        output_names=["o1", "o2"],
        optimize_for_inference=False,
    )
    orig_model.seek(0)

    net = Net.load(orig_model)
    var_a = net.var_filter.name("a").as_unique()
    var_b = net.var_filter.name("b").as_unique()

    y1 = (var_a + var_b) * 3
    y2 = F.sigmoid(var_a + var_b)

    net.remove_output(*net.output_vars)
    y1.name = "new_o1"
    y2.name = "new_o2"
    net.add_output(y1, y2)

    modified_model = io.BytesIO()
    net.dump(modified_model)
    modified_model.seek(0)

    g = GraphInference(modified_model)
    out = g.run(a.numpy(), b.numpy())

    np.testing.assert_equal(out["new_o1"], ((a + b) * 3).numpy())
    np.testing.assert_almost_equal(out["new_o2"], (F.sigmoid((a + b))).numpy())
示例#17
0
def test_dump_cond_take():

    a = Tensor([1.0, 2.0])

    @trace(symbolic=True, capture_as_const=True)
    def fwd(a):
        return F.cond_take(a > 1, a)

    fwd(a)
    orig_model = io.BytesIO()
    fwd.dump(
        orig_model,
        arg_names=["a"],
        output_names=["o1", "o2"],
        optimize_for_inference=False,
    )
    orig_model.seek(0)

    net = Net.load(orig_model)
    var_a = net.input_vars[0]

    val, idx = F.cond_take(var_a > 1, var_a)

    net.remove_output(*net.output_vars)
    val.name = "value"
    idx.name = "index"
    net.add_output(val, idx)

    modified_model = io.BytesIO()
    net.dump(modified_model)
    modified_model.seek(0)

    g = GraphInference(modified_model)
    out = g.run(a.numpy())

    data = a.numpy()
    mask = a.numpy() > 1
    np.testing.assert_equal(out["index"], np.where(mask.reshape(-1))[0])
    np.testing.assert_equal(out["value"], data[mask])
示例#18
0
def test_topological_sort():
    @trace(symbolic=True, capture_as_const=True)
    def func(x, y):
        a = x + y
        a1 = F.relu(a)
        a2 = F.abs(a)
        a3 = F.ceil(a) * 2
        a4 = F.floor(a)
        r = a1 - a2
        r1 = a3 / a4
        return r, r1

    file = io.BytesIO()
    func(megengine.tensor(1.0), megengine.tensor(2.0))
    func.dump(file,
              optimize_for_inference=False,
              keep_opr_name=True,
              keep_opr_priority=True)
    file.seek(0)
    g = Network.load(file)
    oprseq1 = g.all_oprs
    gt = [
        "Host2DeviceCopy",
        "Host2DeviceCopy",
        "ADD",
        "RELU",
        "ABS",
        "CEIL",
        "ImmutableTensor",
        "MUL",
        "FLOOR",
        "SUB",
        "TRUE_DIV",
    ]
    for op, mode in zip(oprseq1, gt):
        if op.type == "Elemwise":
            assert op.params["mode"] == mode
        else:
            assert op.type == mode
示例#19
0
def test_query():
    class Model(M.Module):
        def __init__(self):
            super().__init__()
            self.conv1 = M.Conv2d(3, 32, 3)
            self.conv2 = M.Conv2d(32, 32, 3)
            self.conv3 = M.Conv2d(32, 32, 3)

        def forward(self, data):
            x = self.conv1(data)
            x = self.conv2(x)
            x = self.conv3(x)
            return x

    n = Model()

    @trace(symbolic=True, capture_as_const=True)
    def fwd(data):
        return n(data)

    fwd(Tensor(np.random.random((1, 3, 224, 224))))
    orig_model = io.BytesIO()
    fwd.dump(
        orig_model,
        arg_names=["data"],
        output_names="o",
        keep_opr_name=True,
        keep_var_name=True,
        optimize_for_inference=False,
    )
    orig_model.seek(0)

    graph = Net.load(orig_model)

    r = graph.data_providers_filter.as_count()
    assert r == 1

    opr = graph.get_opr_by_type(Host2DeviceCopy)
    assert isinstance(opr, Host2DeviceCopy)

    r1 = graph.params_filter.as_count()
    assert r1 == 6

    r2 = graph.opr_filter.type(N.ConvolutionForward).as_count()
    assert r2 == 3

    r3 = graph.opr_filter.not_type(N.ConvolutionForward).as_count()
    assert r3 == len(graph.all_oprs) - r2

    var = graph.var_filter.name("data").as_unique()
    r4 = graph.opr_filter.has_input(var).as_count()
    assert r4 == 1

    r5 = graph.opr_filter.name("data").as_count()
    assert r5 == 1

    opr = graph.get_opr_by_name("data")
    assert isinstance(opr, Host2DeviceCopy)

    var = graph.get_var_by_name("data")
    assert isinstance(var, VarNode)

    r6 = graph.var_filter.name("*bias").as_count()
    assert r6 == 3
示例#20
0
def visualize(
    model_path: str,
    log_path: str,
    bar_length_max: int = 20,
    log_params: bool = True,
    log_flops: bool = True,
):
    r"""
    Load megengine dumped model and visualize graph structure with tensorboard log files.
    Can also record and print model's statistics like :func:`~.module_stats`

    :param model_path: dir path for megengine dumped model.
    :param log_path: dir path for tensorboard graph log.
    :param bar_length_max: size of bar indicating max flops or parameter size in net stats.
    :param log_params: whether print and record params size.
    :param log_flops: whether print and record op flops.
    """
    if log_path:
        try:
            from tensorboard.compat.proto.attr_value_pb2 import AttrValue
            from tensorboard.compat.proto.config_pb2 import RunMetadata
            from tensorboard.compat.proto.graph_pb2 import GraphDef
            from tensorboard.compat.proto.node_def_pb2 import NodeDef
            from tensorboard.compat.proto.step_stats_pb2 import (
                AllocatorMemoryUsed,
                DeviceStepStats,
                NodeExecStats,
                StepStats,
            )
            from tensorboard.compat.proto.tensor_shape_pb2 import TensorShapeProto
            from tensorboard.compat.proto.versions_pb2 import VersionDef
            from tensorboardX import SummaryWriter
        except ImportError:
            logger.error(
                "TensorBoard and TensorboardX are required for visualize.",
                exc_info=True,
            )
            return
    # FIXME: remove this after resolving "span dist too large" warning
    old_level = set_mgb_log_level(logging.ERROR)

    enable_receptive_field()

    graph = Network.load(model_path)

    def process_name(name):
        # nodes that start with point or contain float const will lead to display bug
        if not re.match(r"^[+-]?\d*\.\d*", name):
            name = name.replace(".", "/")
        return name.encode(encoding="utf-8")

    summary = [["item", "value"]]
    node_list = []
    flops_list = []
    params_list = []
    for node in graph.all_oprs:
        if hasattr(node, "output_idx"):
            node_oup = node.outputs[node.output_idx]
        else:
            if len(node.outputs) != 1:
                logger.warning(
                    "OpNode {} has more than one output and not has 'output_idx' attr."
                    .format(node))
            node_oup = node.outputs[0]

        inp_list = [process_name(var.owner.name) for var in node.inputs]
        if log_path:
            # detail format see tensorboard/compat/proto/attr_value.proto
            attr = {
                "_output_shapes":
                AttrValue(list=AttrValue.ListValue(shape=[
                    TensorShapeProto(dim=[
                        TensorShapeProto.Dim(size=d) for d in node_oup.shape
                    ])
                ])),
                "params":
                AttrValue(s=str(node.params).encode(encoding="utf-8")),
                "dtype":
                AttrValue(s=str(node_oup.dtype).encode(encoding="utf-8")),
            }
        flops_stats = get_op_stats(node, node.inputs, node.outputs)
        if flops_stats is not None:
            # add op flops attr
            if log_path and hasattr(flops_stats, "flops_num"):
                attr["flops"] = AttrValue(
                    s=sizeof_fmt(flops_stats["flops"]).encode(
                        encoding="utf-8"))
            flops_stats["name"] = node.name
            flops_stats["class_name"] = node.type
            flops_list.append(flops_stats)

        if node.type == "ImmutableTensor":
            param_stats = get_param_stats(node.numpy())
            # add tensor size attr
            if log_path:
                attr["size"] = AttrValue(
                    s=sizeof_fmt(param_stats["size"]).encode(encoding="utf-8"))
            param_stats["name"] = node.name
            params_list.append(param_stats)

        if log_path:
            node_list.append(
                NodeDef(
                    name=process_name(node.name),
                    op=node.type,
                    input=inp_list,
                    attr=attr,
                ))
    # summary
    extra_info = {
        "#ops": len(graph.all_oprs),
        "#params": len(params_list),
    }

    total_flops, total_param_dims, total_param_size = 0, 0, 0
    if log_params:
        total_param_dims, total_param_size = print_param_stats(
            params_list, bar_length_max)
        extra_info["total_param_dims"] = sizeof_fmt(total_param_dims)
        extra_info["total_param_size"] = sizeof_fmt(total_param_size)
    if log_flops:
        total_flops = print_op_stats(flops_list, bar_length_max)
        extra_info["total_flops"] = sizeof_fmt(total_flops, suffix="OPs")
    if log_params and log_flops:
        extra_info["flops/param_size"] = "{:3.3f}".format(total_flops /
                                                          total_param_size)

    if log_path:
        graph_def = GraphDef(node=node_list, versions=VersionDef(producer=22))

        device = "/device:CPU:0"
        stepstats = RunMetadata(step_stats=StepStats(
            dev_stats=[DeviceStepStats(device=device)]))
        writer = SummaryWriter(log_path)
        writer._get_file_writer().add_graph((graph_def, stepstats))

    print_summary(**extra_info)

    # FIXME: remove this after resolving "span dist too large" warning
    _imperative_rt_logger.set_log_level(old_level)

    return total_param_size, total_flops
示例#21
0
def visualize(
    model_path: str,
    log_path: str,
    input: np.ndarray = None,
    inp_dict: dict = None,
    cal_params: bool = True,
    cal_flops: bool = True,
    cal_activations: bool = True,
    logging_to_stdout: bool = True,
    bar_length_max: int = 20,
):
    r"""
    Load megengine dumped model and visualize graph structure with tensorboard log files.
    Can also record and print model's statistics like :func:`~.module_stats`

    :param model_path: dir path for megengine dumped model.
    :param log_path: dir path for tensorboard graph log.
    :param input: user defined input data for running model and calculating stats, alternative with inp_dict, used when the model has only one input.
    :param inp_dict: input dict for running model and calculating stats, alternative with input, used when the model has more than one input. When both input and inp_dict are None, a random input will be used.
    :param cal_params: whether calculate and record params size.
    :param cal_flops: whether calculate and record op flops.
    :param cal_activations: whether calculate and record op activations.
    :param logging_to_stdout: whether print all calculated statistic details.
    :param bar_length_max: size of bar indicating max flops or parameter size in net stats.

    """
    if log_path:
        try:
            from tensorboard.compat.proto.attr_value_pb2 import AttrValue
            from tensorboard.compat.proto.config_pb2 import RunMetadata
            from tensorboard.compat.proto.graph_pb2 import GraphDef
            from tensorboard.compat.proto.node_def_pb2 import NodeDef
            from tensorboard.compat.proto.step_stats_pb2 import (
                AllocatorMemoryUsed,
                DeviceStepStats,
                NodeExecStats,
                StepStats,
            )
            from tensorboard.compat.proto.tensor_shape_pb2 import TensorShapeProto
            from tensorboard.compat.proto.versions_pb2 import VersionDef
            from tensorboardX import SummaryWriter
        except ImportError:
            logger.error(
                "TensorBoard and TensorboardX are required for visualize.",
                exc_info=True,
            )
            return

    enable_receptive_field()

    graph = Network.load(model_path)
    graph.reset_batch_size(1)

    has_input = False
    if input is not None or inp_dict is not None:
        has_input = True
        repl_dict = {}
        inp_vars = graph.input_vars
        if inp_dict is not None:
            assert len(inp_dict) == len(
                inp_vars
            ), "Inputs are not sufficient for calculation."
            for v in inp_vars:
                new_input = graph.make_const(inp_dict[v.name], name=v.name)
                repl_dict[v] = new_input
        else:
            assert len(inp_vars) == 1, "The graph needs more than one input."
            inp_var = inp_vars[0]
            repl_dict[inp_var] = graph.make_const(input, name=inp_var.name)
        graph.replace_vars(repl_dict=repl_dict)

    graph._compile()

    def process_name(name):
        # nodes that start with point or contain float const will lead to display bug
        if not re.match(r"^[+-]?\d*\.\d*", name):
            name = name.replace(".", "/")
        return name.encode(encoding="utf-8")

    summary = [["item", "value"]]
    node_list = []
    flops_list = []
    params_list = []
    activations_list = []
    total_stats = namedtuple("total_stats", ["param_size", "flops", "act_size"])
    stats_details = namedtuple("module_stats", ["params", "flops", "activations"])

    for node in tqdm(graph.all_oprs):
        if hasattr(node, "output_idx"):
            node_oup = node.outputs[node.output_idx]
        else:
            if len(node.outputs) != 1:
                logger.warning(
                    "OpNode {} has more than one output and not has 'output_idx' attr.".format(
                        node
                    )
                )
            node_oup = node.outputs[0]

        inp_list = [process_name(var.owner.name) for var in node.inputs]
        if log_path:
            # detail format see tensorboard/compat/proto/attr_value.proto
            attr = {
                "_output_shapes": AttrValue(
                    list=AttrValue.ListValue(
                        shape=[
                            TensorShapeProto(
                                dim=[
                                    TensorShapeProto.Dim(size=d) for d in node_oup.shape
                                ]
                            )
                        ]
                    )
                ),
                "params": AttrValue(s=str(node.params).encode(encoding="utf-8")),
                "dtype": AttrValue(s=str(node_oup.dtype).encode(encoding="utf-8")),
            }

        if cal_flops:
            flops_stats = get_op_stats(node, node.inputs, node.outputs)
            if flops_stats is not None:
                # add op flops attr
                if log_path and hasattr(flops_stats, "flops_num"):
                    attr["flops"] = AttrValue(
                        s=sizeof_fmt(flops_stats["flops"]).encode(encoding="utf-8")
                    )
                flops_stats["name"] = node.name
                flops_stats["class_name"] = node.type
                flops_list.append(flops_stats)

        if cal_activations:
            acts = get_activation_stats(node_oup.numpy(), has_input=has_input)
            acts["name"] = node.name
            acts["class_name"] = node.type
            activations_list.append(acts)

        if cal_params:
            if node.type == "ImmutableTensor":
                param_stats = get_param_stats(node.numpy())
                # add tensor size attr
                if log_path:
                    attr["size"] = AttrValue(
                        s=sizeof_fmt(param_stats["size"]).encode(encoding="utf-8")
                    )
                param_stats["name"] = node.name
                params_list.append(param_stats)

        if log_path:
            node_list.append(
                NodeDef(
                    name=process_name(node.name),
                    op=node.type,
                    input=inp_list,
                    attr=attr,
                )
            )
    # summary
    extra_info = {
        "#ops": len(graph.all_oprs),
        "#params": len(params_list),
    }

    (
        total_flops,
        total_param_dims,
        total_param_size,
        total_act_dims,
        total_act_size,
    ) = (0, 0, 0, 0, 0)

    if cal_params:
        total_param_dims, total_param_size, params_list = sum_param_stats(
            params_list, bar_length_max
        )
        extra_info["total_param_dims"] = sizeof_fmt(total_param_dims, suffix="")
        extra_info["total_param_size"] = sizeof_fmt(total_param_size)
        if logging_to_stdout:
            print_param_stats(params_list)

    if cal_flops:
        total_flops, flops_list = sum_op_stats(flops_list, bar_length_max)
        extra_info["total_flops"] = sizeof_fmt(total_flops, suffix="OPs")
        if logging_to_stdout:
            print_op_stats(flops_list)

    if cal_activations:
        total_act_dims, total_act_size, activations_list = sum_activations_stats(
            activations_list, bar_length_max
        )
        extra_info["total_act_dims"] = sizeof_fmt(total_act_dims, suffix="")
        extra_info["total_act_size"] = sizeof_fmt(total_act_size)
        if logging_to_stdout:
            print_activations_stats(activations_list, has_input=has_input)

    if cal_flops and cal_params:
        extra_info["flops/param_size"] = "{:3.3f}".format(
            total_flops / total_param_size
        )

    if log_path:
        graph_def = GraphDef(node=node_list, versions=VersionDef(producer=22))

        device = "/device:CPU:0"
        stepstats = RunMetadata(
            step_stats=StepStats(dev_stats=[DeviceStepStats(device=device)])
        )
        writer = SummaryWriter(log_path)
        writer._get_file_writer().add_graph((graph_def, stepstats))

    print_summary(**extra_info)

    return (
        total_stats(
            param_size=total_param_size, flops=total_flops, act_size=total_act_size,
        ),
        stats_details(
            params=params_list, flops=flops_list, activations=activations_list
        ),
    )