Exemplo n.º 1
0
    def test_dnnlowp_depthwise_3x3_conv(
        self,
        stride,
        size,
        group,
        batch_size,
        prepack_weight,
        share_col_buffer,
        preserve_activation_sparsity,
        preserve_weight_sparsity,
        quantize_groupwise,
        relu,
        gc,
        dc,
    ):
        pad = 1
        kernel = 3
        dilation = 1
        input_channels_per_group = 1
        output_channels_per_group = 1
        order = "NHWC"

        X, W, b = generate_conv_inputs(
            stride,
            pad,
            kernel,
            dilation,
            size,
            group,
            input_channels_per_group,
            output_channels_per_group,
            batch_size,
            order,
            groupwise_quantization=quantize_groupwise,
            preserve_activation_sparsity=preserve_activation_sparsity,
            preserve_weight_sparsity=preserve_weight_sparsity,
        )

        Output = collections.namedtuple("Output",
                                        ["Y", "op_type", "engine", "order"])
        outputs = []

        if relu:
            op_engine_list = [
                ("Conv", ""),
                ("ConvRelu", "DNNLOWP"),
                ("Int8ConvRelu", "DNNLOWP"),
            ]
        else:
            op_engine_list = [
                ("Conv", ""),
                ("Conv", "DNNLOWP"),
                ("Int8Conv", "DNNLOWP"),
            ]

        for op_type, engine in op_engine_list:
            init_net = core.Net("test_init_net")
            net = core.Net("test_net")

            do_quantize = "DNNLOWP" in engine
            do_dequantize = "DNNLOWP" in engine
            do_prepack_weight = engine == "DNNLOWP" and prepack_weight

            if do_quantize:
                quantize = core.CreateOperator(
                    "Quantize",
                    ["X"],
                    ["X_q"],
                    preserve_activation_sparsity=preserve_activation_sparsity,
                    engine=engine,
                    device_option=gc,
                )
                net.Proto().op.extend([quantize])

            if do_prepack_weight:
                X_min = 0 if X.size == 0 else X.min()
                X_max = 0 if X.size == 0 else X.max()
                x_q_param = dnnlowp_utils.choose_quantization_params(
                    X_min, X_max, preserve_activation_sparsity)
                inputs = ["W"]
                if do_dequantize:
                    inputs += ["b"]
                pack = core.CreateOperator(
                    "Int8ConvPackWeight",
                    inputs,
                    ["W_packed"],
                    stride=stride,
                    kernel=kernel,
                    dilation=dilation,
                    pad=pad,
                    preserve_weight_sparsity=preserve_weight_sparsity,
                    engine=engine,
                    group=group,
                    quantize_groupwise=quantize_groupwise,
                    in_scale=x_q_param.scale,
                )
                init_net.Proto().op.extend([pack])

            conv = core.CreateOperator(
                op_type,
                ["X_q" if do_quantize else "X", "W", "b"],
                ["Y_q" if do_dequantize else "Y"],
                stride=stride,
                kernel=kernel,
                dilation=dilation,
                pad=pad,
                order=order,
                shared_buffer=(1 if share_col_buffer else 0),
                preserve_activation_sparsity=preserve_activation_sparsity,
                preserve_weight_sparsity=preserve_weight_sparsity,
                engine=engine,
                group=group,
                quantize_groupwise=quantize_groupwise,
                device_option=gc,
            )
            if do_dequantize or do_prepack_weight:
                dnnlowp_utils.add_quantization_param_args(
                    conv, outputs[0][0], preserve_activation_sparsity)
            net.Proto().op.extend([conv])

            if do_dequantize:
                dequantize = core.CreateOperator("Dequantize", ["Y_q"], ["Y"],
                                                 engine=engine,
                                                 device_option=gc)
                net.Proto().op.extend([dequantize])
            elif relu:
                relu_op = core.CreateOperator("Relu", ["Y"], ["Y"],
                                              engine=engine,
                                              device_option=gc)
                net.Proto().op.extend([relu_op])

            run_conv_or_fc(self, init_net, net, X, W, b, op_type, engine,
                           order, gc, outputs)

        check_quantized_results_close(outputs,
                                      symmetric=preserve_activation_sparsity)
    def test_groupwise_dnnlowp_conv_int(
        self,
        stride,
        pad,
        kernel,
        dilation,
        size,
        group,
        input_channels_per_group,
        output_channels_per_group,
        batch_size,
        order,
        prepack_weight,
        preserve_activation_sparsity,
        preserve_weight_sparsity,
        gc,
        dc,
    ):
        assume(group == 1 or dilation == 1)
        assume((not prepack_weight) or order == "NHWC")

        X, W, b = generate_conv_inputs(
            stride,
            pad,
            kernel,
            dilation,
            size,
            group,
            input_channels_per_group,
            output_channels_per_group,
            batch_size,
            order,
            groupwise_quantization=True,
            preserve_activation_sparsity=preserve_activation_sparsity,
            preserve_weight_sparsity=preserve_weight_sparsity,
        )

        Output = collections.namedtuple("Output",
                                        ["Y", "op_type", "engine", "order"])
        outputs = []

        op_engine_list = [
            ("Conv", ""),
            ("Conv", "DNNLOWP"),
            ("Conv", "DNNLOWP_16"),
            ("Int8Conv", "DNNLOWP"),
        ]

        for op_type, engine in op_engine_list:
            init_net = core.Net("test_init_net")
            net = core.Net("test_net")

            do_quantize = "DNNLOWP" in engine
            do_dequantize = "DNNLOWP" in engine
            do_prepack_weight = engine == "DNNLOWP" and prepack_weight

            if do_quantize:
                quantize = core.CreateOperator(
                    "Quantize",
                    ["X"],
                    ["X_q"],
                    preserve_activation_sparsity=preserve_activation_sparsity,
                    engine=engine,
                    device_option=gc,
                )
                net.Proto().op.extend([quantize])

            if do_prepack_weight:
                X_min = 0 if X.size == 0 else X.min()
                X_max = 0 if X.size == 0 else X.max()
                x_q_param = hardcode_scale_zp.choose_quantization_params(
                    X_min, X_max)
                inputs = ["W"]
                if do_dequantize:
                    inputs += ["b"]
                pack = core.CreateOperator(
                    "Int8ConvPackWeight",
                    inputs,
                    ["W_packed"],
                    stride=stride,
                    kernel=kernel,
                    dilation=dilation,
                    pad=pad,
                    preserve_weight_sparsity=preserve_weight_sparsity,
                    engine=engine,
                    group=group,
                    quantize_groupwise=1,
                    in_scale=x_q_param.scale,
                )
                init_net.Proto().op.extend([pack])

            conv = core.CreateOperator(
                op_type,
                [
                    "X_q" if do_quantize else "X",
                    "W_packed" if do_prepack_weight else "W",
                    "b",
                ],
                ["Y_q" if do_dequantize else "Y"],
                stride=stride,
                kernel=kernel,
                dilation=dilation,
                pad=pad,
                order=order,
                preserve_activation_sparsity=preserve_activation_sparsity,
                preserve_weight_sparsity=preserve_weight_sparsity,
                engine=engine,
                group=group,
                quantize_groupwise=1,
                device_option=gc,
            )
            if do_dequantize or do_prepack_weight:
                # groupwise quantization only works with static quantization
                # so we need to set quantization parameters
                dnnlowp_utils.add_quantization_param_args(
                    conv, outputs[0][0], preserve_activation_sparsity)
            net.Proto().op.extend([conv])

            if do_dequantize:
                dequantize = core.CreateOperator(
                    "Dequantize",
                    ["Y_q"],
                    ["Y"],
                    preserve_activation_sparsity=preserve_activation_sparsity,
                    engine=engine,
                    device_option=gc,
                )
                net.Proto().op.extend([dequantize])

            run_conv_or_fc(self, init_net, net, X, W, b, op_type, engine,
                           order, gc, outputs)

        check_quantized_results_close(outputs,
                                      symmetric=preserve_activation_sparsity)
Exemplo n.º 3
0
    def test_dnnlowp_conv_int(
        self,
        stride,
        pad,
        kernel,
        dilation,
        size,
        group,
        input_channels_per_group,
        output_channels_per_group,
        batch_size,
        order,
        weight_quantized,
        prepack_weight,
        share_col_buffer,
        preserve_activation_sparsity,
        preserve_weight_sparsity,
        gc,
        dc,
    ):
        assume(group == 1 or dilation == 1)
        assume((not prepack_weight) or order == "NHWC")

        X, W, b = generate_conv_inputs(
            stride,
            pad,
            kernel,
            dilation,
            size,
            group,
            input_channels_per_group,
            output_channels_per_group,
            batch_size,
            order,
            preserve_activation_sparsity=preserve_activation_sparsity,
            preserve_weight_sparsity=preserve_weight_sparsity,
        )

        Output = collections.namedtuple("Output",
                                        ["Y", "op_type", "engine", "order"])
        outputs = []

        op_engine_list = [
            ("Conv", ""),
            ("Conv", "DNNLOWP"),
            ("Conv", "DNNLOWP_16"),
            ("Int8Conv", "DNNLOWP"),
        ]

        for op_type, engine in op_engine_list:
            init_net = core.Net("test_init_net")
            net = core.Net("test_net")

            do_quantize = "DNNLOWP" in engine
            do_dequantize = "DNNLOWP" in engine
            # If output scale/zp aren't set, it gets computed from ref fp32 op
            # in DNNLOWP, which isn't possible when we quantize input weights.
            # Make sure atleast one output is collected to compute output
            # scale/zp.
            do_quantize_weight = (engine == "DNNLOWP" and weight_quantized
                                  and len(outputs) > 0)
            do_prepack_weight = engine == "DNNLOWP" and prepack_weight

            if do_quantize:
                quantize = core.CreateOperator(
                    "Quantize",
                    ["X"],
                    ["X_q"],
                    preserve_activation_sparsity=preserve_activation_sparsity,
                    engine=engine,
                    device_option=gc,
                )
                net.Proto().op.extend([quantize])

            X_min = 0 if X.size == 0 else X.min()
            X_max = 0 if X.size == 0 else X.max()
            x_q_param = dnnlowp_utils.choose_quantization_params(
                X_min, X_max, preserve_activation_sparsity)
            if do_quantize_weight:
                int8_given_tensor_fill, w_q_param = dnnlowp_utils.create_int8_given_tensor_fill(
                    W, "W_q", preserve_weight_sparsity)
                init_net.Proto().op.extend([int8_given_tensor_fill])

                # Bias
                int8_bias_tensor_fill = dnnlowp_utils.create_int8_bias_tensor_fill(
                    b, "b_q", x_q_param, w_q_param)
                init_net.Proto().op.extend([int8_bias_tensor_fill])

            if do_prepack_weight:
                inputs = ["W_q" if do_quantize_weight else "W"]
                if do_dequantize:
                    inputs += ["b_q" if do_quantize_weight else "b"]
                pack = core.CreateOperator(
                    "Int8ConvPackWeight",
                    inputs,
                    ["W_packed"],
                    stride=stride,
                    kernel=kernel,
                    dilation=dilation,
                    pad=pad,
                    preserve_weight_sparsity=preserve_weight_sparsity,
                    engine=engine,
                    group=group,
                    in_scale=x_q_param.scale,
                )
                init_net.Proto().op.extend([pack])

            conv = core.CreateOperator(
                op_type,
                [
                    "X_q" if do_quantize else "X",
                    "W_packed" if do_prepack_weight else
                    ("W_q" if do_quantize_weight else "W"),
                    "b_q" if do_quantize_weight else "b",
                ],
                ["Y_q" if do_dequantize else "Y"],
                stride=stride,
                kernel=kernel,
                dilation=dilation,
                pad=pad,
                order=order,
                shared_buffer=(1 if share_col_buffer else 0),
                preserve_activation_sparsity=preserve_activation_sparsity,
                preserve_weight_sparsity=preserve_weight_sparsity,
                engine=engine,
                group=group,
                device_option=gc,
            )
            if do_quantize_weight or do_prepack_weight:
                # When quantized weight is provided, we can't rescale the
                # output dynamically by looking at the range of output of each
                # batch, so here we provide the range of output observed from
                # fp32 reference implementation
                dnnlowp_utils.add_quantization_param_args(
                    conv, outputs[0][0], preserve_activation_sparsity)
            net.Proto().op.extend([conv])

            if do_dequantize:
                dequantize = core.CreateOperator("Dequantize", ["Y_q"], ["Y"],
                                                 engine=engine,
                                                 device_option=gc)
                net.Proto().op.extend([dequantize])

            run_conv_or_fc(self, init_net, net, X, W, b, op_type, engine,
                           order, gc, outputs)

        check_quantized_results_close(outputs,
                                      symmetric=preserve_activation_sparsity)
    def test_groupwise_dnnlowp_conv_relu_int(
        self,
        stride,
        pad,
        kernel,
        dilation,
        size,
        group,
        input_channels_per_group,
        output_channels_per_group,
        batch_size,
        order,
        gc,
        dc,
    ):
        assume(group == 1 or dilation == 1)

        X, W, b = generate_conv_inputs(
            stride,
            pad,
            kernel,
            dilation,
            size,
            group,
            input_channels_per_group,
            output_channels_per_group,
            batch_size,
            order,
            True,  # group-wise
        )

        Output = collections.namedtuple("Output",
                                        ["Y", "op_type", "engine", "order"])
        outputs = []

        op_engine_list = [
            ("Conv", ""),
            ("ConvRelu", "DNNLOWP"),
            ("ConvRelu", "DNNLOWP_16"),
            ("Int8ConvRelu", "DNNLOWP"),
        ]

        for op_type, engine in op_engine_list:
            net = core.Net("test_net")

            if "DNNLOWP" in engine:
                quantize = core.CreateOperator("Quantize", ["X"], ["X_q"],
                                               engine=engine,
                                               device_option=gc)
                net.Proto().op.extend([quantize])

                conv = core.CreateOperator(
                    op_type,
                    ["X_q", "W", "b"],
                    ["Y_q"],
                    stride=stride,
                    kernel=kernel,
                    dilation=dilation,
                    pad=pad,
                    order=order,
                    engine=engine,
                    group=group,
                    quantize_groupwise=1,
                    device_option=gc,
                )
                # groupwise quantization only works with static quantization
                # so we need to set quantization parameters
                dnnlowp_utils.add_quantization_param_args(conv, outputs[0][0])
                net.Proto().op.extend([conv])

                dequantize = core.CreateOperator("Dequantize", ["Y_q"], ["Y"],
                                                 engine=engine,
                                                 device_option=gc)
                net.Proto().op.extend([dequantize])
            else:
                conv = core.CreateOperator(
                    op_type,
                    ["X", "W", "b"],
                    ["Y"],
                    stride=stride,
                    kernel=kernel,
                    dilation=dilation,
                    pad=pad,
                    order=order,
                    engine=engine,
                    group=group,
                    device_option=gc,
                )
                net.Proto().op.extend([conv])

                relu = core.CreateOperator("Relu", ["Y"], ["Y"],
                                           engine=engine,
                                           device_option=gc)
                net.Proto().op.extend([relu])

            run_conv_or_fc(self, None, net, X, W, b, op_type, engine, order,
                           gc, outputs)

        check_quantized_results_close(outputs)
Exemplo n.º 5
0
    def test_dnnlowp_conv_relu_int(
        self,
        stride,
        pad,
        kernel,
        dilation,
        size,
        group,
        input_channels_per_group,
        output_channels_per_group,
        batch_size,
        order,
        share_col_buffer,
        gc,
        dc,
    ):
        assume(group == 1 or dilation == 1)

        X, W, b = generate_conv_inputs(
            stride,
            pad,
            kernel,
            dilation,
            size,
            group,
            input_channels_per_group,
            output_channels_per_group,
            batch_size,
            order,
        )

        Output = collections.namedtuple("Output",
                                        ["Y", "op_type", "engine", "order"])
        outputs = []

        op_engine_list = [
            ("Conv", ""),
            ("ConvRelu", "DNNLOWP"),
            ("ConvRelu", "DNNLOWP_16"),
            ("Int8ConvRelu", "DNNLOWP"),
        ]

        for op_type, engine in op_engine_list:
            net = core.Net("test_net")

            if "DNNLOWP" in engine:
                quantize = core.CreateOperator("Quantize", ["X"], ["X_q"],
                                               engine=engine,
                                               device_option=gc)
                net.Proto().op.extend([quantize])

                conv = core.CreateOperator(
                    op_type,
                    ["X_q", "W", "b"],
                    ["Y_q"],
                    stride=stride,
                    kernel=kernel,
                    dilation=dilation,
                    pad=pad,
                    order=order,
                    engine=engine,
                    shared_buffer=(1 if share_col_buffer else 0),
                    group=group,
                    device_option=gc,
                )
                net.Proto().op.extend([conv])

                dequantize = core.CreateOperator("Dequantize", ["Y_q"], ["Y"],
                                                 engine=engine,
                                                 device_option=gc)
                net.Proto().op.extend([dequantize])
            else:
                conv = core.CreateOperator(
                    op_type,
                    ["X", "W", "b"],
                    ["Y"],
                    stride=stride,
                    kernel=kernel,
                    dilation=dilation,
                    pad=pad,
                    order=order,
                    shared_buffer=(1 if share_col_buffer else 0),
                    engine=engine,
                    group=group,
                    device_option=gc,
                )
                net.Proto().op.extend([conv])

                relu = core.CreateOperator("Relu", ["Y"], ["Y"],
                                           engine=engine,
                                           device_option=gc)
                net.Proto().op.extend([relu])

            run_conv_or_fc(self, None, net, X, W, b, op_type, engine, order,
                           gc, outputs)

        check_quantized_results_close(outputs)