def run_collage(
    input_mod, targets, cost_estimator, expected_mod, tvm_max_depth=8, byoc_max_depth=8
):
    ctxt = {
        "relay.collage.tvm_max_depth": tvm_max_depth,
        "relay.collage.byoc_max_depth": byoc_max_depth,
    }
    expected_mod = InferType()(expected_mod)
    pass_ctxt = tvm.transform.PassContext(config=ctxt)
    with pass_ctxt:
        config = make_compilation_config(pass_ctxt, targets)
        actual_mod = InferType()(input_mod)
        # Capture indexes only to help debug failing tests
        actual_mod = CapturePostDfsIndexInSpans()(actual_mod)
        actual_mod = CollagePartition(config, cost_estimator)(actual_mod)

        if not tvm.ir.structural_equal(actual_mod, expected_mod, map_free_vars=True):
            # Print everything in full so we can see what's going on when things fail.
            print("Input module:")
            print(input_mod)
            print("Actual module:")
            print(actual_mod)
            print("Expected module:")
            print(expected_mod)
            # Assert again so as to see the actual disagreeing sub-expressions.
            tvm.ir.assert_structural_equal(actual_mod, expected_mod, map_free_vars=True)
Пример #2
0
    def transform_module(self, mod, _):
        """Invokes the pass"""
        # TODO(@jroesch): Is there a way to do one shot initialization?
        # can we have def pass_init?
        mod.import_from_std("core.rly")
        mod = InferType()(mod)

        assert isinstance(self.targets, (dict, container.Map))
        if len(self.targets) > 1:
            pass_ctx = PassContext.current()
            if "relay.fallback_device_type" in pass_ctx.config:
                fallback_ctx = nd.context(
                    pass_ctx.config["relay.fallback_device_type"])
            else:
                fallback_ctx = cpu(0)
            ca = context_analysis(mod, TVMContext(fallback_ctx.device_type, 0))
        else:
            if isinstance(self.targets, dict):
                dev = list(self.targets.keys())[0]
            else:
                dev, _ = self.targets.items()[0]
            ca = context_analysis(mod, nd.context(dev.value))

        # The following code can be used for debugging the module after
        # annotation.
        # print(mod.astext(show_meta_data=False, annotate=mk_analysis_annotator(ca)))

        gv_funcs = mod.functions
        for gv, f in gv_funcs.items():
            ea = ManifestAllocPass(self.target_host, ca)
            f = ea.visit(f)
            mod.update_func(gv, f)
        return mod
Пример #3
0
 def __init__(self, mod=None):
     self.target_host = None
     self.build_func = None
     self.params = None
     self.target = None
     self.name = None
     self.dev = None
     self.idx = None
     self.mod = mod
     self.input_params = InferType()(mod)["main"].params
     self.output_type = InferType()(mod)["main"].checked_type.ret_type
     self.input_bindings = PipelineConfig.BindingList(self, "input")
     self.output_bindings = PipelineConfig.BindingList(self, "output")
Пример #4
0
def test_batch_matmul_simple():
    """Batch matmul is a special case where we try to accumulate to fp16.

    This is due to the fact heterogenous accumulation dtypes does not work
    on all platforms at the moment.
    """
    data = relay.var("data", shape=[1, 1, 20])
    weight = relay.var("weight", shape=[1, 20, 20])
    a = relay.nn.batch_matmul(data, weight)
    mod = tvm.IRModule.from_expr(a)
    mod_params = {
        "data": np.random.uniform(-1, 1, size=[1, 1, 20]).astype("float32"),
        "weight": np.random.uniform(-1, 1, size=[1, 20, 20]).astype("float32"),
    }
    output_mod = verify_mixed_precision_output_close(mod,
                                                     mod_params,
                                                     atol=0.01,
                                                     rtol=0.01)
    # Create expected module
    data = relay.cast(relay.var("data", shape=[1, 1, 20]), "float16")
    weight = relay.cast(relay.var("weight", shape=[1, 20, 20]), "float16")
    a = relay.nn.batch_matmul(data, weight, out_dtype="float16")
    expected_mod = tvm.IRModule.from_expr(a)
    expected_mod = InferType()(expected_mod)
    assert tvm.ir.structural_equal(expected_mod, output_mod)
Пример #5
0
def test_where_simple():
    data = relay.var("data", shape=[1, 20])
    weight = relay.var("weight", shape=[20, 20])
    a = relay.nn.dense(data, weight, units=20)
    b = relay.where(data, a, a)
    mod = tvm.IRModule.from_expr(b)
    mod_params = {
        "data": np.random.uniform(-1, 1, size=[1, 20]).astype("float32"),
        "weight": np.random.uniform(-1, 1, size=[20, 20]).astype("float32"),
    }

    output_mod = verify_mixed_precision_output_close(mod,
                                                     mod_params,
                                                     atol=0.01,
                                                     rtol=0.01)

    # Create expected module
    data = relay.cast(relay.var("data", shape=[1, 20]), "float16")
    weight = relay.cast(relay.var("weight", shape=[20, 20]), "float16")
    a = relay.nn.dense(data, weight, units=20, out_dtype="float16")
    b = relay.where(data, a, a)
    expected_mod = tvm.IRModule.from_expr(b)
    expected_mod = InferType()(expected_mod)

    assert tvm.ir.structural_equal(expected_mod, output_mod)
Пример #6
0
def test_convert_follow_node_with_integer_arguments():
    """Tests the conversion of a follow op with integer arguments + constant float args.

    The follow op should convert the floating point argument into fp16 as constants/vars
    will always be converted if safe to do so.
    """

    data = relay.var("data", shape=[1, 10], dtype="float32")

    # We use an addition to make sure the input indices are not a var
    # (which are always casted if safe)
    indices = relay.var("indices", shape=[1, 1], dtype="int32") + relay.const(
        0, dtype="int32")
    take = relay.take(data, indices, axis=0)
    mod = tvm.IRModule.from_expr(take)

    mod_params = {
        "data": np.random.uniform(-1, 1, size=[1, 10]).astype("float32"),
        "indices": np.array([[0]]).astype("int32"),
    }
    output_mod = verify_mixed_precision_output_close(mod,
                                                     mod_params,
                                                     atol=0.01,
                                                     rtol=0.01)

    # Create expected module
    data = relay.cast(relay.var("data", shape=[1, 10]), "float16")
    take = relay.take(data, indices, axis=0)
    expected_mod = tvm.IRModule.from_expr(take)
    expected_mod = InferType()(expected_mod)
    assert tvm.ir.structural_equal(expected_mod, output_mod)
Пример #7
0
def verify_mixed_precision_output_close(
    mod: tvm.runtime.Module,
    mod_params: Dict[str, Any],
    mixed_precision_dtype="float16",
    rtol: float = 1e-3,
    atol: float = 0,
    keep_orig_output_dtype=False,
) -> tvm.runtime.Module:

    mod = InferType()(mod)
    result_fp32 = run_module(mod, mod_params)

    if not keep_orig_output_dtype:
        fp16_mod = ToMixedPrecision(mixed_precision_dtype)(mod)
        result_fp16 = run_module(fp16_mod, mod_params)
    else:
        with tvm.transform.PassContext(
                config={"relay.ToMixedPrecision.keep_orig_output_dtype": True
                        }):
            fp16_mod = ToMixedPrecision(mixed_precision_dtype)(mod)
            result_fp16 = run_module(fp16_mod, mod_params)

    # Ensure the results are close
    for fp32, fp16 in zip(result_fp32, result_fp16):
        np.testing.assert_allclose(fp32, fp16, rtol=rtol, atol=atol)

    if keep_orig_output_dtype:
        assert (np.array(result_fp16).dtype == np.array(result_fp32).dtype
                ), "output type and original type mismatch"

    return fp16_mod
Пример #8
0
def test_green_red_not_use_extraneous_cast():
    """Conv. is a green listed operation, while softmax is red.

    Conv. also by default accumulates to fp32 but outputs fp16.

    We want to avoid a situation where we have extraneous casts.
    E.g. because softmax wants to operate on FP32 we might have

    conv (FP32) -> cast (FP16) -> cast (FP32) -> softmax (FP32)

    To get around this internally when we cast in the pass we cache
    the output nodes and the reverse of the cast back to the original
    node. For example casting the `conv (FP32)` to FP16 would produce:

    `conv (FP32) -> cast (FP16)`

    As the outputs. Now anytime we try to cast the `conv (FP32)` node
    to FP16 it would return the cached result instead of a new cast node:

    `conv (FP32) -> cast (FP16)`

    Furthermore, if we try to cast the `cast (FP16)` node back to FP32 it
    would just return

    `conv (FP32)`.

    This test makes sure this behavior occurs.
    """
    data_shape = (1, 3, 32, 32)
    weight_shape = (5, 3, 3, 3)
    data = relay.var("data", shape=data_shape, dtype="float32")
    weight = relay.var("weight", shape=weight_shape, dtype="float32")
    conv = relay.nn.conv2d(data, weight, strides=(1, 1), padding=(1, 1), out_dtype="float32")
    result = relay.nn.softmax(conv)
    mod = tvm.IRModule.from_expr(result)

    mod_params = {
        "data": np.random.uniform(-1, 1, size=data_shape).astype("float32"),
        "weight": np.random.uniform(-1, 1, size=weight_shape).astype("float32"),
    }
    fp16_mod = verify_mixed_precision_output_close(mod, mod_params, atol=0.01, rtol=1e-3)

    # Construct expected structure
    conv = relay.cast(
        relay.nn.conv2d(
            relay.cast(data, "float16"),
            relay.cast(weight, "float16"),
            strides=(1, 1),
            padding=(1, 1),
            out_dtype="float16",
        ),
        "float32",
    )
    result = relay.nn.softmax(conv)
    expected_mod = tvm.IRModule.from_expr(result)
    expected_mod = InferType()(expected_mod)

    assert tvm.ir.structural_equal(expected_mod, fp16_mod)
Пример #9
0
def test_let_statement_simple():
    """A 'simple' let statement example.

    Noticeable is the mutation of the bound variable types.
    """
    var1 = relay.var("var1", shape=[1, 20])
    var2 = relay.var("var2", shape=[1, 20])

    data = relay.var("data", shape=[1, 20])
    weight = relay.var("weight", shape=[20, 20])

    r1 = var1 + var1

    r2 = var2 + var2
    let2 = relay.Let(var2, relay.nn.dense(r1, weight, units=20), r2)
    let1 = relay.Let(var1, relay.nn.dense(data, weight, units=20), let2)

    mod = tvm.IRModule.from_expr(let1)
    mod_params = {
        "data": np.random.uniform(-1, 1, size=[1, 20]).astype("float32"),
        "weight": np.random.uniform(-1, 1, size=[20, 20]).astype("float32"),
    }
    output_mod = verify_mixed_precision_output_close(mod,
                                                     mod_params,
                                                     atol=0.01,
                                                     rtol=0.01)

    # Construct expected structure
    var1 = relay.var("var1", shape=[1, 20], dtype="float16")
    var2 = relay.var("var2", shape=[1, 20], dtype="float16")
    data = relay.cast(relay.var("data", shape=[1, 20]), "float16")
    weight = relay.cast(relay.var("weight", shape=[20, 20]), "float16")
    r1 = var1 + var1
    r2 = var2 + var2
    let2 = relay.Let(
        var2,
        relay.cast(relay.nn.dense(r1, weight, units=20, out_dtype="float32"),
                   "float16"),
        r2,
    )
    let1 = relay.Let(
        var1,
        relay.cast(relay.nn.dense(data, weight, units=20, out_dtype="float32"),
                   "float16"),
        let2,
    )
    expected_mod = tvm.IRModule.from_expr(let1)
    expected_mod = InferType()(expected_mod)

    assert tvm.ir.structural_equal(expected_mod, output_mod)
Пример #10
0
def test_conv2d_bwd():
    IC = 16
    OC = 8
    dshape = (16, IC, 32, 32)
    wshape = (OC, IC, 3, 3)
    padding = (0, 0)
    strides = (1, 1)

    conv = get_conv2d_nchw(
        dshape,
        wshape,
        padding,
        strides=strides,
        out_dtype="float32",
        data_dtype="float32",
        weight_dtype="float32",
    )
    fwd_mod = InferType()(tvm.IRModule.from_expr(conv))

    # Note: large difference in tvm and cutlass Wgrad results if use fp16.
    # Cutlass wgrad uses fp32 accumulation even if the output is fp16.
    use_fp16 = False
    verify_dgrad = False  # False to verify wgrad
    tol = 1e-5 if verify_dgrad else 1e-4  # Wgrad slightly less accurate

    if use_fp16:
        fwd_mod = ToMixedPrecision("float16")(fwd_mod)

    fwd_bwd_func = FirstOrderGradient()(fwd_mod)["main"]

    bwd_func = relay.Function(
        fwd_bwd_func.params,
        relay.TupleGetItem(relay.TupleGetItem(fwd_bwd_func.body, 1),
                           0 if verify_dgrad else 1),
    )

    verify_conv2d(
        bwd_func,
        bwd_func,
        dshape,
        wshape,
        sm=80,
        atol=1e-2 if use_fp16 else tol,
        rtol=1e-2 if use_fp16 else tol,
        use_cudnn_ref=False,
        data_dtype="float32",
        weight_dtype="float32",
        use_vm=True,
    )
Пример #11
0
def verify_mixed_precision_output_close(
    mod: tvm.runtime.Module,
    mod_params: Dict[str, Any],
    mixed_precision_dtype="float16",
    rtol: float = 1e-3,
    atol: float = 0,
) -> tvm.runtime.Module:

    mod = InferType()(mod)
    result_fp32 = run_module(mod, mod_params)
    fp16_mod = ToMixedPrecision(mixed_precision_dtype)(mod)
    result_fp16 = run_module(fp16_mod, mod_params)
    # Ensure the results are close
    for fp32, fp16 in zip(result_fp32, result_fp16):
        np.testing.assert_allclose(fp32, fp16, rtol=rtol, atol=atol)

    return fp16_mod
Пример #12
0
def test_unused_function():
    cond = relay.const(True)
    mod = tvm.IRModule()
    then_name = relay.GlobalVar("times_2")
    # define unused function
    else_name = relay.GlobalVar("times_3")
    t1 = relay.TensorType((2, 2), dtype="float32")
    x1 = relay.var("x1", t1, dtype="float32")
    x2 = relay.var("x2", t1, dtype="float32")
    f2 = relay.multiply(x1, relay.const(2.0))
    f3 = relay.multiply(x2, relay.const(3.0))
    mod[then_name] = relay.Function([x1], f2)
    mod[else_name] = relay.Function([x2], f3)
    mod = InferType()(mod)
    x3 = relay.var("x3", t1, dtype="float32")
    # put unused function in else branch
    f = relay.If(cond, then_name(x3), else_name(x3))
    mod["main"] = relay.Function([x3], f)
    x_data = np.random.rand(2, 2).astype("float32")
    y_data = x_data * 2

    check_result([x_data], y_data, mod=mod)
Пример #13
0
    def _partition_call_operator(self, inputs, attr):
        """
        Convert the Relay Partition call ops into Relay Function calls and
        function definitions from Tensorflow graph library attribute to Relay global
        functions

        Parameters
        ----------
        node: TensorFlow graph node object.
            A TensorFlow graph node object.

        inputs : List[tvm.relay.Expr]
            List of input symbols.

        attrs : Dict[tvm.Attrs]
            Dict of operator attributes.

        Returns
        -------
        op : tvm.relay.Expr
            Converted relay expression.
        """

        try:
            from tensorflow.python.framework import function_def_to_graph
        except ImportError as e:
            raise ImportError(
                "Unable to import tensorflow which is required {}".format(e))

        main_graph_proto = self._main_graph_proto
        outer_graph_def = main_graph_proto._graph

        node_func_name = attr.get("f").name
        func = next(
            (f for f in outer_graph_def.library.function
             if f.signature.name == node_func_name),
            None,
        )
        if func:
            devices = set(node.device for node in func.node_def)
            if len(devices) > 1:
                raise Exception("Found inconsistent Device assignment in the "
                                "Stateful Partitioned SubGraph. Rejecting "
                                "the subgraph ")
            # Convert function definition to graph
            func_input_shapes = func.attr["_input_shapes"].list.shape
            subgraph, _ = function_def_to_graph.function_def_to_graph_def(
                func, func_input_shapes)

            # Computing subgraph's input shape dictionary
            subgraph_shape_dict, input_expr_dict = {}, {}
            for f_arg, input in zip(func.signature.input_arg, inputs):
                input_expr_dict[f_arg.name] = input
                subgraph_shape_dict[f_arg.name] = _infer_shape(
                    input, main_graph_proto._mod)

            func_name = "func_{}".format(func.signature.name)
            try:
                global_func = main_graph_proto._mod[func_name]
                sub_func = global_func
                sub_params = main_graph_proto._params
            except ValueError:
                # Construct relay nodes from the subgraph
                g1 = SubGraphProto(main_graph_proto)
                sub_func, sub_params = g1.from_tensorflow(
                    subgraph, shape=subgraph_shape_dict)
                main_graph_proto._params.update(sub_params)
                func_expr = _function.Function(sub_func.params, sub_func.body)
                global_func = tvm.relay.GlobalVar(func_name)
                main_graph_proto._mod[global_func] = func_expr
                main_graph_proto._mod = InferType()(main_graph_proto._mod)

            param_exprs = []
            for param_expr in sub_func.params:
                # sub_params is subset of sub_func.params
                param_name = param_expr.vid.name_hint
                if param_name in input_expr_dict.keys():
                    param_exprs.append(input_expr_dict[param_name])
                elif param_name in sub_params.keys():
                    param_exprs.append(param_expr)
                else:
                    raise Exception(
                        "Input parameter {} not found".format(param_name))

            sb = tvm.relay.scope_builder.ScopeBuilder()
            loop_ret = global_func(*param_exprs)
            sb.ret(loop_ret)
            ret = sb.get()
        else:
            raise Exception("Function not found - {}".format(node_func_name))
        return ret
Пример #14
0
def _convert_function(
    module, graph, inputs, attr, node_func_name, prelude, gdef_lib, in_shapes=None
):
    """Convert given tf node to a relay function call

    Parameters
    ----------
    module : IRModule
        where converted function is stored

    graph: <class 'tensorflow.core.framework.graph_pb2.GraphDef'>
        top level tf graphdef

    inputs : List[tvm.relay.Expr]
        List of input symbols. Parameters for the function.

    attrs : Dict[tvm.Attrs]
        Dict of operator attributes.

    node_func_name : str
        Name of tf2 node to be converted

    Returns
    -------
    op : tvm.relay.Expr
        <class 'tvm.relay.expr.Call'>

    Examples
    --------
    a tf function "x+1", is implemented as a subgraph in the libary section of the graph.
    this subgraph is converted to a relay function such as
        fn (%x: float32) {
        add(%x, 1f) /* Identity */
        }

    the subgraph has a function name such as __inference_add_95
    the tf function call operator is returned as relay expression, such as:
        free_var %x: float32;
        @func___inference_add_95(%x)

    """
    func = next(
        (f for f in graph.library.function if f.signature.name == node_func_name),
        None,
    )
    if func is None:
        raise Exception("Function not found - {}".format(node_func_name))
    devices = set(node.device for node in func.node_def)
    if len(devices) > 1:
        raise Exception(
            "node_def in function {} contains > 1 types of devices {}".format(
                node_func_name, devices
            )
        )

    subgraph = gdef_lib[node_func_name]
    # preserve library functions in subgraphs to make them available to nested functions
    for fn in graph.library.function:
        subgraph.library.function.add().CopyFrom(fn)

    # Computing subgraph's input shape and type dictionaries
    input_expr_dict = {}
    input_types = {}
    for f_arg, input_ in zip(func.signature.input_arg, inputs):
        input_expr_dict[f_arg.name] = input_
        input_types[f_arg.name] = _infer_type_with_prelude(input_, prelude)

    func_name = "func_{}".format(func.signature.name)
    try:
        global_func = module.mod[func_name]
        sub_func = global_func
        sub_params = module.params
    except ValueError:
        # Construct relay nodes from the subgraph
        g1 = GraphProto(module)
        output_sig = [func.ret[f.name] for f in func.signature.output_arg]
        # TODO: unify prelude and main IRModules
        sub_func, sub_params = g1.from_tensorflow(
            subgraph, outputs=output_sig, input_types=input_types, gdef_lib=gdef_lib
        )
        module.params.update(sub_params)
        func_expr = _function.Function(sub_func.params, sub_func.body)
        global_func = tvm.relay.GlobalVar(func_name)
        module.mod[global_func] = func_expr
        module.mod = InferType()(module.mod)
        prelude.mod = module.mod

    param_exprs = []
    for param_expr in sub_func.params:
        # sub_params is subset of sub_func.params
        param_name = param_expr.vid.name_hint
        if param_name in input_expr_dict.keys():
            param_exprs.append(input_expr_dict[param_name])
        elif param_name in sub_params.keys():
            param_exprs.append(param_expr)
        else:
            raise Exception("Input parameter {} not found".format(param_name))

    sb = tvm.relay.scope_builder.ScopeBuilder()
    loop_ret = global_func(*param_exprs)
    sb.ret(loop_ret)
    ret = sb.get()
    return ret