Пример #1
0
    def test_add_consistency2(self) -> None:
        """Test code for 'Add', which fails."""
        a = Constant(
            'const1',
            Float32(),
            np.zeros([1, 3, 3])
        )
        b = Constant(
            'const2',
            Float32(),
            np.zeros([2])
        )
        input_ops = {'A': cast(Operator, a), 'B': cast(Operator, b)}
        try:
            Add(
                'add1',
                [1, 3, 3],
                Float32(),
                input_ops
            )
        except AssertionError:
            print("Consistency test for 'Add' #2 passed!")

        else:
            self.assertTrue(False, "Consistency test for 'Add' #2 failed.")
Пример #2
0
    def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph:
        graph = Graph()

        # input
        x = Input('placeholder', [1, 5, 5, 3], Float32())

        # Conv1
        w1 = Constant('weight1', Float32(), data1)
        conv1 = Conv('conv1', [1, 4, 4, 3], Float32(), {'X': x, 'W': w1}, kernel_shape=[2, 2])

        # activation quantizer
        s1 = Constant('aq_const1', Float32(), np.array(1))
        s2 = Constant('aq_const2', Float32(), np.array(2))
        aq = LinearMidTreadHalfQuantizer('aqtz1', [1, 4, 4, 3], Float32(), {'X': conv1, 'Y': s1, 'Z': s2})

        # Conv2
        w2 = Constant('weight2', Float32(), data2)
        kq = BinaryMeanScalingQuantizer('kqtz1', [1, 2, 2, 3], Float32(), {'input': w2})
        conv2 = Conv('conv2', [1, 3, 3, 3], Float32(), {'X': aq, 'W': kq}, kernel_shape=[2, 2])
        conv2.a_quantizer = [aq]
        conv2.quantizer = kq

        # One output
        y = Output('output', [1, 3, 3, 3], Float32(), {'input': conv2})

        # add ops to the graph
        graph.add_op_and_inputs(y)

        return graph
Пример #3
0
    def test_dynamic_create_binary(self) -> None:
        """Test code for binary operators."""
        x = Constant('const1', Float32(), np.zeros([1, 3, 3, 3]))

        w = Constant('const2', Float32(), np.zeros([1, 2, 2, 3]))

        binary_ops = ['Conv', 'Add']

        name = 'test'
        dtype = Float32()

        for op in binary_ops:
            shape = [1, 3, 3, 3]
            module = importlib.import_module(
                'blueoil.converter.core.operators')
            try:
                op_def = getattr(module, op)
                shape = [1, 2, 2, 3] if op == 'Conv' else shape
                input_ops = {n: opw for n, opw in zip(op_def.input_names, [x, w])} \
                    if op == 'Conv' else {n: x for n in op_def.input_names}
                args = [name, shape, dtype, input_ops]
                obj = op_def(*args)
                self.assertEqual(obj.name, name)
            except Exception as e:
                print(f'failed in testing {op}.')
                raise e

        print("Dynamic binary operator load test passed!")
Пример #4
0
def pass_constant_folding(graph: Graph) -> None:
    """Given a node N, if the value of each input of N is known at compilation time then N will be executed.
       The node N and its inputs will be replaced with a Constant node which holds the computed output of N.

    Args:
        graph (Graph): The input graph. It will be modified in-place.
        processed_nodes (list): The list of the processed nodes so far.

    """

    done = False
    processed_nodes = []
    while not done:
        exec_list = sort_graph(graph)
        processed_before_precompute = len(processed_nodes)
        to_be_removed = []

        for m in exec_list:
            if m in processed_nodes:
                continue

            # We want operators with inputs
            if not m.input_nodes:
                continue

            precomputable = True
            for input_node in m.input_nodes:
                if input_node.op_type != 'Constant':
                    precomputable = False

            if not precomputable:
                continue

            processed_nodes += m.input_nodes
            processed_nodes.append(m)

            data = m.run_forward()

            new_constant = Constant(m.name + '_new',
                                    m.dtype,
                                    data,
                                    dimension_format=m.dimension)
            graph.add_op(new_constant)

            # get nodes to be removed after being disconnected
            get_nodes_in_branch(m, None, to_be_removed)

            new_constant.add_outputs({'output': m.output_op_list})
            for output_name, consumer_list in m.output_ops.items():
                for consumer_node in consumer_list:
                    for input_name, input_node in consumer_node.input_ops.items(
                    ):
                        if input_node == m:
                            consumer_node.add_input(input_name, new_constant)
                            break

        for op in to_be_removed:
            graph.remove_op(op)

        done = len(processed_nodes) == processed_before_precompute
Пример #5
0
    def test_add_consistency1(self) -> None:
        """Test code for 'Add', which succeeds."""
        a = Constant('const1', Float32(), np.zeros([1, 3, 3]))
        b = Constant('const2', Float32(), np.zeros([3]))
        input_ops = {'A': cast(Operator, a), 'B': cast(Operator, b)}
        add = Add('add1', [1, 3, 3], Float32(), input_ops)

        print("Consistency test for 'Add' #1 passed!")
Пример #6
0
    def make_simple_model(self) -> Graph:
        graph = Graph()

        # two inputs
        x = Input(
            'input',
            [1, 5, 5, 3],
            Float32(),
        )

        w = Constant(
            'weight',
            Float32(),
            np.zeros([1, 2, 2, 3]),
            dimension_format='NHWC',
        )

        # Conv
        conv = Conv('conv', [1, 4, 4, 1],
                    Float32(), {
                        'X': x,
                        'W': w
                    },
                    kernel_shape=[2, 2])

        # One output
        y = Output('output', [1, 4, 4, 1], Float32(), {'input': conv})

        # add ops to the graph
        graph.add_op_and_inputs(y)
        return graph
Пример #7
0
    def test_dynamic_create_unary(self) -> None:
        """Test code for unary operators."""
        unary_ops = [
            'Identity', 'BinaryMeanScalingQuantizer', 'Transpose',
            'LinearMidTreadHalfQuantizer', 'MaxPool', 'AveragePool', 'Reshape',
            'Softmax'
        ]

        # unary input
        shape = [1, 3, 3, 3]
        x = Constant('const', Float32(), np.zeros(shape))

        name = 'test'
        dtype = Float32()

        for op in unary_ops:
            shape = [1, 3, 3, 3]
            module = importlib.import_module(
                'blueoil.converter.core.operators')
            try:
                op_def = getattr(module, op)
                input_ops = {n: x for n in op_def.input_names}
                shape = self.reverse_shape(shape) if op == 'Transpose' \
                    else [1, 2, 2, 3] if op == 'MaxPool' or op == 'AveragePool' \
                    else shape
                args = [name, shape, dtype, input_ops]
                obj = op_def(*args)
                self.assertEqual(obj.name, name)
            except Exception as e:
                print(f'failed in testing {op}.')
                raise e

        print("Dynamic unary operator load test passed!")
Пример #8
0
    def test_dynamic_create_batchnorm(self) -> None:
        """Test code for n-ary operators (BatchNormalization)."""
        x = Constant(
            'const',
            Float32(),
            np.zeros([1, 3, 3, 3])
        )

        nary_ops = [
            'BatchNormalization'
        ]

        name = 'test'
        shape = [1, 3, 3, 3]
        dtype = Float32()

        for op in nary_ops:
            module = importlib.import_module('blueoil.converter.core.operators')
            try:
                op_def = getattr(module, op)
                input_ops = {n: x for n in op_def.input_names}
                args = [name, shape, dtype, input_ops]
                obj = op_def(*args)
                self.assertEqual(obj.name, name)
            except Exception as e:
                print(f'failed in testing {op}.')
                raise e

        print("Dynamic batchnorm operator load test passed!")
Пример #9
0
    def create_expected_graph(data: np.ndarray) -> Graph:
        graph = Graph()

        # input
        x = Input('placeholder', [1, 5, 5, 3], Float32())

        # constant and internal nodes
        w = Constant('weight', Float32(), data)
        q = BinaryMeanScalingQuantizer('qtz1', [1, 2, 2, 3], Float32(),
                                       {'input': w})

        # Conv
        conv = Conv('conv', [1, 4, 4, 3],
                    Float32(), {
                        'X': x,
                        'W': q
                    },
                    kernel_shape=[2, 2])

        # One output
        rs = Reshape('reshape', [1, 48], Float32(), {'data': conv})
        y = Output(
            'output',
            [1, 48],
            Float32(),
            {'input': rs},
        )

        # add ops to the graph
        graph.add_op_and_inputs(y)

        return graph
Пример #10
0
    def create_sample_graph() -> Graph:
        graph = Graph()

        x = Input('placeholder', [2], Float32())

        s1 = Constant('potato_1', Float32(), np.array([1, 2]))
        s2 = Constant('potato_2', Float32(), np.array([1, 3]))
        add1 = Add('potatoes', [2], Float32(), {'A': s1, 'B': s2})
        add2 = Add('more_potatoes', [2], Float32(), {'A': x, 'B': add1})

        # One output
        y = Output('output', [2], Float32(), {'input': add2})

        # add ops to the graph
        graph.add_op_and_inputs(y)

        return graph
Пример #11
0
    def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph:
        graph = Graph()

        # input
        x = Input('placeholder', [1, 5, 5, 3], Float32())

        # Conv1
        w1 = Constant('weight1', Float32(), data1)
        conv1 = Conv('conv1', [1, 4, 4, 3], Float32(), {'X': x, 'W': w1}, kernel_shape=[2, 2])

        # activation quantizer
        s1 = Constant('aq_const1', Int32(), np.array([2], dtype=np.int32))
        s2 = Constant('aq_const2', Float32(), np.array([2.0], dtype=np.float32))
        aq1 = LinearMidTreadHalfQuantizer('aqtz1', [1, 4, 4, 3], Float32(), {'X': conv1, 'Y': s1, 'Z': s2})

        # Conv2
        w2 = Constant('weight2', Float32(), data2)
        kq = BinaryMeanScalingQuantizer('kqtz1', [1, 2, 2, 3], Float32(), {'input': w2})
        conv2 = Conv('conv2', [1, 3, 3, 3], Float32(), {'X': aq1, 'W': kq}, kernel_shape=[2, 2])
        conv2.a_quantizer = [aq1]
        conv2.quantizer = kq
        conv2.is_quantized = True

        sc = Constant('bn_scale', Float32(), np.random.rand(3))
        be = Constant('bn_b', Float32(), np.random.rand(3))
        mu = Constant('bn_mu', Float32(), np.random.rand(3))
        va = Constant('bn_var', Float32(), np.random.rand(3))
        bn = BatchNormalization('bn', [1, 3, 3, 3], Float32(), {'X': conv2,
                                                                'scale': sc,
                                                                'B': be,
                                                                'mean': mu,
                                                                'var': va})

        # activation quantizer
        s3 = Constant('aq_const3', Int32(), np.array([2], dtype=np.int32))
        s4 = Constant('aq_const4', Float32(), np.array([2.0], dtype=np.float32))
        aq2 = LinearMidTreadHalfQuantizer('aqtz2', [1, 3, 3, 3], Float32(), {'X': bn, 'Y': s3, 'Z': s4})

        # One output
        y = Output('output', [1, 3, 3, 3], Float32(), {'input': aq2})

        # add ops to the graph
        graph.add_op_and_inputs(y)

        return graph
Пример #12
0
    def create_sample_graph_2(data1: np.ndarray) -> Graph:
        graph = Graph()

        # input
        x = Input('placeholder', [1, 5, 5, 3], Float32())

        # Conv1
        w1 = Constant('weight1', Float32(), data1)
        conv1 = Conv('conv1', [1, 4, 4, 3], Float32(), {'X': x, 'W': w1}, kernel_shape=[2, 2])

        s1 = Constant('const1', Float32(), np.zeros([1, 4, 4, 3]))
        add1 = Add('add', [1, 4, 4, 3], Float32(), {'A': conv1, 'B': s1})

        y = Output('output', [1, 4, 4, 3], Float32(), {'input': add1})

        # add ops to the graph
        graph.add_op_and_inputs(y)

        return graph
Пример #13
0
    def test_pool_consistency(self) -> None:
        """Test code for Pool."""
        x = Constant('const1', Float32(), np.zeros([1, 3, 3, 3]))
        input_ops = {'X': cast(Operator, x)}

        add = MaxPool('max_pool1', [1, 2, 2, 3],
                      Float32(),
                      input_ops,
                      kernel_shape=[3, 3],
                      pads=[1, 1, 1, 1],
                      strides=[2, 2])

        print("Consistency test for pooling operator passed!")
Пример #14
0
    def test_maxpool(self) -> None:
        """Test code for MaxPool."""
        # get MaxPool's input names
        i_names = MaxPool.input_names
        self.assertEqual(i_names, ['X'])

        # set x to MaxPool m's input
        x = Constant('const', Float32(), np.zeros([1, 3, 3, 3]))
        inputs: Dict[str, Operator] = {i_names[0]: x}
        m = MaxPool("MaxPool", [1, 2, 2, 3],
                    Float32(),
                    inputs,
                    kernel_shape=[2, 2])

        print("MaxPool test passed!")
Пример #15
0
    def test_conv_consistency(self) -> None:
        """Test code for Conv."""
        x = Input(
            'const1',
            [1, 3, 3, 3],
            Float32(),
        )
        w = Constant('weight', Float32(), np.zeros([1, 2, 2, 3]))
        input_ops = {'X': cast(Operator, x), 'W': cast(Operator, w)}

        add = Conv('conv_under_test', [1, 3, 3, 3],
                   Float32(),
                   input_ops,
                   pads=[1, 1, 2, 2],
                   strides=[2, 2])

        print("Consistency test for conv operator passed!")
Пример #16
0
    def create_sample_graph(data1: np.ndarray) -> Graph:
        graph = Graph()

        # input
        x = Input('placeholder', [1, 5, 5, 3], Float32())

        # Conv1
        w1 = Constant('weight1', Float32(), data1)
        conv1 = Conv('conv1', [1, 4, 4, 3], QUANTIZED_PACKED(), {'X': x, 'W': w1}, kernel_shape=[2, 2])

        pool1 = SpaceToDepth('s2d', [1, 2, 2, 12], Float32(), {'input': conv1})

        # One output
        y = Output('output', [1, 2, 2, 12], Float32(), {'input': pool1})

        # add ops to the graph
        graph.add_op_and_inputs(y)

        return graph
Пример #17
0
    def test_graph_conv(self) -> None:
        """Test code for making a simple graph with Conv."""
        graph = Graph()

        # two inputs
        x = Input(
            'input',
            [1, 5, 5, 3],
            Float32(),
        )

        w = Constant(
            'weight',
            Float32(),
            np.zeros([1, 2, 2, 3])
        )

        # Conv
        conv = Conv(
            'conv',
            [1, 4, 4, 3],
            Float32(),
            {'X': x, 'W': w},  # you can get these keys by 'Conv.input_names'
            kernel_shape=[2, 2]
        )

        # One output
        y = Output(
            'output',
            [1, 4, 4, 3],
            Float32(),
            {'input': conv}  # you can get this key by 'Output.input_names'
        )

        # add ops to the graph
        graph.add_op(x)
        graph.add_op(w)
        graph.add_op(conv)
        graph.add_op(y)

        self.assertTrue(graph.check_nodes(), "All inputs of operators must match their outputs.")
        print("Graph test passed!")
Пример #18
0
    def create_expected_graph(data: np.ndarray) -> Graph:
        graph = Graph()

        data = data.transpose([3, 2, 1, 0])

        # input
        x = Input('placeholder', [1, 5, 5, 3],
                  Float32(),
                  dimension_format='NHWC')

        # constant and internal nodes
        w = Constant('weight', Float32(), data, dimension_format='NHWC')
        i1 = Identity('identity1', [1, 2, 2, 3],
                      Float32(), {'input': w},
                      dimension_format='NHWC')
        q = BinaryMeanScalingQuantizer('qtz1', [1, 2, 2, 3],
                                       Float32(), {'input': i1},
                                       dimension_format='NHWC')

        # Conv
        conv = Conv('conv', [1, 4, 4, 3],
                    Float32(), {
                        'X': x,
                        'W': q
                    },
                    kernel_shape=[2, 2],
                    dimension_format='NHWC')

        # One output
        rs = Reshape('reshape', [1, 48], Float32(), {'data': conv})
        y = Output(
            'output',
            [1, 48],
            Float32(),
            {'input': rs},
        )

        # add ops to the graph
        graph.add_op_and_inputs(y)

        return graph
Пример #19
0
    def test_conv(self) -> None:
        """Test code for Conv."""
        # get Conv's input names
        i_names = Conv.input_names
        self.assertTrue({'X', 'W'}.issubset(set(i_names)))

        # set x to MaxPool m's input
        x = Input(
            'input',
            [1, 3, 3, 3],
            Float32(),
        )
        w = Constant('weight', Float32(), np.zeros([1, 2, 2, 5]))
        inputs: Dict[str, Operator] = {i_names[0]: x, i_names[1]: w}
        c = Conv("conv1", [1, 2, 2, 3], Float32(), inputs, kernel_shape=[2, 2])

        self.assertEqual(c.batchsize, 1)
        self.assertEqual(c.height, 2)
        self.assertEqual(c.width, 2)
        self.assertEqual(c.channel, 3)
        self.assertEqual(c.kernel_height, 2)
        self.assertEqual(c.kernel_width, 2)

        print("Conv test passed!")
Пример #20
0
def pass_simplify_batchnorm(graph: Graph) -> None:
    """Simplify BarchNorm operator.
    """

    exec_list = [
        x for x in sort_graph(graph) if x.op_type == 'BatchNormalization'
    ]

    to_be_removed = []

    for node in exec_list:
        scale = node.input_ops['scale']
        if scale.op_type != 'Constant':
            raise RuntimeError('scale for BatchNormalization must be Constant')
        B = node.input_ops['B']
        if B.op_type != 'Constant':
            raise RuntimeError('B for BatchNormalization must be Constant')
        mean = node.input_ops['mean']
        if mean.op_type != 'Constant':
            raise RuntimeError('mean for BatchNormalization must be Constant')
        var = node.input_ops['var']
        if var.op_type != 'Constant':
            raise RuntimeError('var for BatchNormalization must be Constant')

        new_name = node.name + '_optimized'
        new_scale_data = scale.data / np.sqrt(var.data + node.epsilon)
        new_scale = Constant(new_name + '_scale',
                             scale.dtype,
                             new_scale_data,
                             dimension_format=scale.dimension)
        new_bias_data = B.data - new_scale_data * mean.data
        new_bias = Constant(new_name + '_bias',
                            B.dtype,
                            new_bias_data,
                            dimension_format=B.dimension)
        new_op = BatchNormalizationOptimized(new_name,
                                             node.shape,
                                             node.dtype, {
                                                 'X': node.input_ops['X'],
                                                 'scale': new_scale,
                                                 'bias': new_bias
                                             },
                                             dimension_format=node.dimension)
        new_scale.add_output('output', new_op)
        new_bias.add_output('output', new_op)

        input_op = node.input_ops['X']
        update_key = None
        new_outputs = [new_op]
        for key, inout_ops in input_op.output_ops.items():
            if node in inout_ops:
                update_key = key
                for op in inout_ops:
                    if op != node:
                        new_outputs.append(op)
        if update_key is not None:
            input_op.remove_output(update_key)
            input_op.add_outputs({update_key: new_outputs})

        out_ops = node.output_op_list
        for op in out_ops:
            update_key = None
            for key, outin_op in op.input_ops.items():
                if outin_op == node:
                    update_key = key
            if update_key is not None:
                op.add_input(update_key, new_op)
            new_op.add_output('Y', op)

        graph.add_op(new_scale)
        graph.add_op(new_bias)
        graph.add_op(new_op)

        to_be_removed += [node, scale, B, mean, var]

    for node in to_be_removed:
        graph.remove_op(node)
Пример #21
0
def pass_lookup(graph: Graph) -> None:
    """Lookup.

    Args:
        graph (Graph): The input graph. It will be modified in-place.

    """
    quantization_types = [
        'BinaryMeanScalingQuantizer', 'LinearMidTreadHalfQuantizer',
        'BinaryChannelWiseMeanScalingQuantizer'
    ]

    to_be_removed = []
    exec_list = [
        n for n in sort_graph(graph) if n.op_type in quantization_types
    ]
    placeholder = [n for n in sort_graph(graph) if n.op_type in 'Input']

    for m in exec_list:
        quantizer = m

        p1 = quantizer.input_nodes[0]
        if p1.op_type != 'Reshape':
            continue
        p2 = p1.input_nodes[0]
        if p2.op_type != 'Reshape':
            continue
        p3 = p2.input_nodes[0]
        if p3.op_type != 'Gather':
            continue
        p4 = p3.input_nodes[0]
        if p4.op_type != 'Gather':
            continue
        gather_params = p4.input_nodes[0]
        if gather_params.rank != 2 or gather_params.shape[0] != 256:
            continue

        params = gather_params.data
        data = {'data': params}
        qtz_data = quantizer.run(**data)['data']

        word_size = 32
        lu_bitwidth = quantizer.nbit
        packer = Packer(lu_bitwidth, word_size)

        lsb = np.zeros((256, ), np.uint32)
        msb = np.zeros((256, ), np.uint32)

        idx = 0
        for p in qtz_data:
            data = packer.run(p.astype(np.float32), p.shape).flatten()
            lsb[idx] = data[0]
            msb[idx] = data[1]

            idx += 1

        pe_lsb = Constant('pe_lsb_new',
                          QUANTIZED_PACKED_KERNEL(),
                          lsb,
                          dimension_format='TC',
                          packed=True,
                          actual_shape=[256, word_size])
        pe_msb = Constant('pe_msb_new',
                          QUANTIZED_PACKED_KERNEL(),
                          msb,
                          dimension_format='TC',
                          packed=True,
                          actual_shape=[256, word_size])

        n, h, w, c = quantizer.shape
        shape = [1, h, w, 2, word_size]
        pe = Lookup('Lookup',
                    shape,
                    QUANTIZED_PACKED(), {
                        'input': placeholder[0],
                        'lsb': pe_lsb,
                        'msb': pe_msb
                    },
                    dimension_format='ChHWBCl')

        get_nodes_in_branch(quantizer, placeholder[0], to_be_removed)

        reserved_placeholder_ops = [
            out_op for out_op in placeholder[0].output_op_list
            if out_op not in to_be_removed
        ]
        placeholder[0].remove_output('output')
        placeholder[0].add_outputs({'output': reserved_placeholder_ops})
        pe.add_outputs(quantizer.output_ops)

        output_op = quantizer.output_op_list[0]

        target_input_name = 'X'
        for input_name in output_op._input_names:
            if quantizer.equals(output_op._input_ops[input_name]):
                target_input_name = input_name
                break

        output_op.add_input(target_input_name, pe)

        graph.add_op(pe_lsb)
        graph.add_op(pe_msb)
        graph.add_op(pe)

    for op in to_be_removed:
        graph.remove_op(op)
Пример #22
0
def pass_pack_weights(graph: Graph) -> None:
    """Given a Quantized convolution node C, it will pack the weights of C into 32 bit words.
       If the node Q that apply quantization to the weights of C quantizes, for example, into 1 bit values
       then one 32 bit word will contain 32 weights.

    Args:
        graph (Graph): The input graph. It will be modified in-place.

    """
    exec_list = [n for n in sort_graph(graph) if n.op_type == 'Conv']
    quantization_types = [
        'BinaryMeanScalingQuantizer', 'LinearMidTreadHalfQuantizer',
        'BinaryChannelWiseMeanScalingQuantizer'
    ]

    word_size = 32
    weight_bitwidth = 1
    packer = Packer(weight_bitwidth, word_size)
    to_be_removed = []
    b = 32

    for m in exec_list:
        conv_node = m

        # check if this is a quantized convolution
        if not conv_node.quantizer or not conv_node.a_quantizer:
            continue

        # Check if we support this kind of quantizer
        weight_quantizer = conv_node.quantizer
        if weight_quantizer.op_type not in quantization_types:
            continue

        # Quantize the weights
        weight_quantizer.run_forward()

        def pad_to_multiple_of_b(tensor, axis, b):
            shape = list(tensor.shape)
            pad = (((shape[axis] + b - 1) // b) * b) - shape[axis]
            shape[axis] = pad
            return np.zeros(shape) if pad else None

        padded_data = np.copy(weight_quantizer.data)

        for axis in [0, 3]:
            pad_tensor = pad_to_multiple_of_b(padded_data, axis, b)
            if pad_tensor is not None:
                padded_data = np.append(padded_data, pad_tensor, axis=axis)

        tca_output = np.copy(padded_data)
        oc, kh, kw, kd = padded_data.shape[:]
        padded_data = padded_data.flatten()
        tca_output = tca_output.flatten()

        out_index = 0
        for g in range(oc // b):
            for p in range(kd // b):
                for h in range(kh):
                    for w in range(kw):
                        for o in range(b):
                            for d in range(b):
                                idx = g * (kw * kh * kd * b) + p * b + h * (
                                    kw * kd) + w * kd + o * (kw * kh * kd) + d
                                tca_output[out_index] = padded_data[idx]
                                out_index += 1

        kn2row_output = np.zeros(oc * kh * kw * kd)
        out_index = 0
        for h in range(kh):
            for w in range(kw):
                for o in range(oc):
                    for i in range(kd):
                        idx = o * kh * kw * kd + h * kw * kd + w * kd + i
                        kn2row_output[out_index] = padded_data[idx]
                        out_index += 1

        op_data = weight_quantizer.binarizer(padded_data)
        data = packer.run(op_data.astype(np.float32),
                          weight_quantizer.dimension)

        tca_binarized_data = weight_quantizer.binarizer(tca_output)
        tca_packed_data = packer.run(tca_binarized_data.astype(np.float32),
                                     weight_quantizer.dimension)

        kn2row_binarized_data = weight_quantizer.binarizer(kn2row_output)
        kn2row_data = packer.run(kn2row_binarized_data.astype(np.float32),
                                 weight_quantizer.dimension)

        shape = [oc, kh, kw, kd]
        tca_shape = [oc // b, kd // b, kh, kw, b, b]
        kn2row_shape = [kh, kw, oc, kd]

        # Create the new constant with the quantized weights
        quantized_constant = Constant(
            weight_quantizer.name + '_new',
            PackedUint32(),
            data=np.vectorize(lambda k: (~k) & ((0x1 << 32) - 1))(data),
            dimension_format="OHWI",
            transposed_dimension_format="OhIhHWOlIl",
            packed=True,
            actual_shape=shape,
            transposed_shape=tca_shape,
            transposed_data=[(~k) & ((0x1 << 32) - 1)
                             for k in tca_packed_data.flatten()],
            kn2row_data=[k for k in kn2row_data.flatten()],
            kn2row_shape=kn2row_shape,
            kn2row_dimension_format="HWOI")

        # get nodes to be removed after being disconnected
        get_nodes_in_branch(weight_quantizer, None, to_be_removed)

        # Add the constant to the graph and connect the new constant
        graph.add_op(quantized_constant)
        quantized_constant.add_outputs(weight_quantizer.output_ops)
        for output_name, consumer_list in weight_quantizer.output_ops.items():
            for consumer_node in consumer_list:
                for input_name, input_node in consumer_node.input_ops.items():
                    if input_node == weight_quantizer:
                        consumer_node.add_input(input_name, quantized_constant)
                        break

    for op in to_be_removed:
        graph.remove_op(op)