示例#1
0
def _make_dense_layer(bits, bits_a, growth_rate, bn_size, dropout):
    new_features = nn.HybridSequential(prefix='')
    if bn_size == 0:
        # no bottleneck
        new_features.add(nn.QActivation(bits=bits_a))
        new_features.add(
            nn.QConv2D(growth_rate, bits=bits, kernel_size=3, padding=1))
        if dropout:
            new_features.add(nn.Dropout(dropout))
        new_features.add(nn.BatchNorm())
    else:
        # bottleneck design
        new_features.add(nn.BatchNorm())
        new_features.add(nn.QActivation(bits=bits_a))
        new_features.add(
            nn.QConv2D(bn_size * growth_rate, bits=bits, kernel_size=1))
        if dropout:
            new_features.add(nn.Dropout(dropout))
        new_features.add(nn.BatchNorm())
        new_features.add(nn.QActivation(bits=bits_a))
        new_features.add(
            nn.QConv2D(growth_rate, bits=bits, kernel_size=3, padding=1))
        if dropout:
            new_features.add(nn.Dropout(dropout))

    out = HybridConcurrent(axis=1, prefix='')
    out.add(Identity())
    out.add(new_features)

    return out
示例#2
0
def test_qactivation(input_shape, threshold):
    in_npy = np.random.uniform(-2, 2, input_shape)
    in_data = mx.nd.array(in_npy)
    in_data.attach_grad()

    binary_layer = nn.QActivation(gradient_cancel_threshold=threshold)
    with autograd.record():
        result1 = binary_layer.forward(in_data)
    result1.backward()
    gradients1 = in_data.grad

    with autograd.record():
        cancelled = mx.ndarray.contrib.gradcancel(in_data, threshold=threshold)
        result2 = cancelled.det_sign()
    result2.backward()
    gradients2 = in_data.grad

    # check that correct functions are used
    assert_almost_equal(result1.asnumpy(), result2.asnumpy())
    assert_almost_equal(gradients1.asnumpy(), gradients2.asnumpy())

    # explicitly model cancelling
    grads_let_through = np.abs(np.sign(gradients2.asnumpy()))
    expected_let_through = np.zeros_like(in_npy)
    expected_let_through[np.abs(in_npy) <= threshold] = 1
    assert_almost_equal(grads_let_through, expected_let_through)

    # shape should be unchanged
    assert_almost_equal(result1.shape, in_data.shape)
示例#3
0
def _make_transition(bits, bits_a, num_output_features):
    out = nn.HybridSequential(prefix='')
    out.add(nn.QActivation(bits=bits_a))
    out.add(nn.QConv2D(num_output_features, bits=bits, kernel_size=1))
    out.add(nn.AvgPool2D(pool_size=2, strides=2))
    out.add(nn.BatchNorm())
    return out
示例#4
0
def test_binary_layer_config_qact(grad_cancel,
                                  bits_a,
                                  activation,
                                  input_shape=(1, 2, 4, 4)):
    d = np.random.uniform(-1, 1, input_shape)
    in_data = mx.nd.array(d)
    in_data.attach_grad()

    qact = nn.QActivation(bits=bits_a,
                          gradient_cancel_threshold=grad_cancel,
                          method=activation)
    with nn.set_binary_layer_config(grad_cancel=grad_cancel,
                                    bits_a=bits_a,
                                    activation=activation):
        qact_config = nn.QActivation()

    grad, y = forward(in_data, qact)
    grad_, y_ = forward(in_data, qact_config)

    np.testing.assert_almost_equal(y, y_)
    np.testing.assert_almost_equal(grad, grad_)
示例#5
0
def test_binary_inference_conv():
    bits_binary_word = 32
    input_dim = 32
    output_dim = 1
    batch_size = 10
    kernel_dim = 1
    input_data = mx.nd.random.normal(-1,
                                     1,
                                     shape=(batch_size, input_dim, kernel_dim,
                                            kernel_dim))
    weight = mx.nd.random.normal(-1,
                                 1,
                                 shape=(output_dim, input_dim, kernel_dim,
                                        kernel_dim))

    # weights concatenation
    size_binary_row = int(weight.size / bits_binary_word)
    weight_concatenated = np.zeros((size_binary_row), dtype='uint32')
    weight_concatenated = mx.nd.array(get_binary_row(weight.reshape(-1),
                                                     weight_concatenated,
                                                     weight.size,
                                                     bits_binary_word),
                                      dtype='float64')
    weight_concatenated = weight_concatenated.reshape(
        (weight.shape[0], -1, weight.shape[2], weight.shape[3]))
    # create binary inference conv layer
    binary_infer_result = mx.ndarray.BinaryInferenceConvolution(
        data=input_data,
        weight=weight_concatenated,
        kernel=(kernel_dim, kernel_dim),
        num_filter=output_dim)

    binary_infer_result2 = mx.ndarray.BinaryInferenceConvolution(
        data=input_data,
        weight=weight_concatenated,
        kernel=(kernel_dim, kernel_dim),
        num_filter=output_dim)

    # create qconv2d layer, assign weights and set input_data.
    qconv_layer = nn.QConv2D(output_dim,
                             kernel_dim,
                             bits=1,
                             use_bias=False,
                             in_channels=input_dim,
                             apply_scaling=False,
                             no_offset=False)
    qact = nn.QActivation(bits=1)
    qact_result = qact.forward(input_data)
    qconv_result = qconv_layer.hybrid_forward(F, x=qact_result, weight=weight)

    np.testing.assert_equal(binary_infer_result.asnumpy(),
                            binary_infer_result2.asnumpy())
示例#6
0
    def __init__(self,
                 bits,
                 bits_a,
                 channels,
                 stride,
                 downsample=False,
                 in_channels=0,
                 clip_threshold=1.0,
                 **kwargs):
        super(BasicBlockV1, self).__init__(**kwargs)
        self.layer1 = nn.HybridSequential(prefix='')
        # Dif to Resnet: One layer is Sign + conv + batchnorm. There are shortcuts around all layers
        self.layer1.add(
            nn.QActivation(bits=bits_a,
                           gradient_cancel_threshold=clip_threshold))
        self.layer1.add(_conv3x3(bits, channels, stride, in_channels))
        self.layer1.add(nn.BatchNorm())

        self.layer2 = nn.HybridSequential(prefix='')
        self.layer2.add(
            nn.QActivation(bits=bits_a,
                           gradient_cancel_threshold=clip_threshold))
        self.layer2.add(_conv3x3(bits, channels, 1, channels))
        self.layer2.add(nn.BatchNorm())

        if downsample:
            self.downsample = nn.HybridSequential(prefix='')
            self.downsample.add(nn.AvgPool2D(pool_size=2, strides=2,
                                             padding=0))
            self.downsample.add(
                nn.QConv2D(channels,
                           kernel_size=1,
                           strides=1,
                           in_channels=in_channels,
                           prefix="sc_qconv_"))
        else:
            self.downsample = None
示例#7
0
def test_binary_inference_fc():
    # setup data
    batch_size = 1
    bits_binary_word = 32
    num_hidden_fc = 10
    num_input_features = 1024
    input_data = mx.nd.random.normal(-1,
                                     1,
                                     shape=(batch_size, num_input_features))
    weight = mx.nd.random.normal(-1,
                                 1,
                                 shape=(num_hidden_fc, num_input_features))

    # input_npy = (np.sign(input_data.asnumpy()).flatten() + 1) / 2
    # weight_npy = (np.sign(weight.asnumpy()).flatten() + 1) / 2
    # result = 0
    # for i in range(len(weight_npy)):
    #     result += 0 if (input_npy[i] + weight_npy[i]) == 1 else 1

    # weights concatenation
    weight_T = weight.T
    size_binary_col = int(weight_T.size / bits_binary_word)
    weight_concatenated = np.zeros((size_binary_col), dtype='uint32')
    weight_concatenated = mx.nd.array(get_binary_col(weight_T.reshape(
        (-1)), weight_concatenated, weight_T.shape[0], weight_T.shape[1],
                                                     bits_binary_word),
                                      dtype='float64')
    weight_concatenated = weight_concatenated.reshape((weight_T.shape[1], -1))
    assert weight_concatenated.shape[0] == num_hidden_fc
    assert weight_concatenated.shape[
        1] == num_input_features // bits_binary_word
    # create binary inference fc layer
    binary_infer_result = mx.ndarray.BinaryInferenceFullyConnected(
        data=input_data, weight=weight_concatenated, num_hidden=num_hidden_fc)

    binary_infer_result2 = mx.ndarray.BinaryInferenceFullyConnected(
        data=input_data, weight=weight_concatenated, num_hidden=num_hidden_fc)

    # create qdense layer, assign weights and set input_data.
    qdense_layer = nn.QDense(num_hidden_fc)
    qact = nn.QActivation(bits=1)
    qact_result = qact.forward(input_data)
    qdense_result = qdense_layer.hybrid_forward(F,
                                                x=qact_result,
                                                weight=weight)

    np.testing.assert_equal(binary_infer_result.asnumpy(),
                            binary_infer_result2.asnumpy())
    def __init__(self,
                 bits,
                 bits_a,
                 num_init_features,
                 growth_rate,
                 block_config,
                 reduction,
                 bn_size,
                 modifier=[],
                 thumbnail=False,
                 dropout=0,
                 classes=1000,
                 **kwargs):
        assert len(modifier) == 0

        super(DenseNetX, self).__init__(**kwargs)
        with self.name_scope():
            self.fp_features = nn.HybridSequential(prefix='')
            if thumbnail:
                self.fp_features.add(
                    nn.Conv2D(num_init_features,
                              kernel_size=3,
                              strides=1,
                              padding=1,
                              in_channels=0,
                              use_bias=False))
            else:
                self.fp_features.add(
                    nn.Conv2D(num_init_features,
                              kernel_size=7,
                              strides=2,
                              padding=3,
                              use_bias=False))
                self.fp_features.add(nn.BatchNorm())
                self.fp_features.add(nn.Activation('relu'))
                self.fp_features.add(
                    nn.MaxPool2D(pool_size=3, strides=2, padding=1))
            # Add dense blocks
            num_features = num_init_features

            self.features1 = nn.HybridSequential(prefix='')
            self.features2 = nn.HybridSequential(prefix='')
            add_to = self.features1
            for i, num_layers in enumerate(block_config):
                add_to.add(
                    _make_dense_block(bits, bits_a, num_layers, bn_size,
                                      growth_rate, dropout, i + 1))
                num_features = num_features + num_layers * growth_rate
                if i != len(block_config) - 1:
                    features_after_transition = num_features // reduction[i]
                    # make it to be multiples of 32
                    features_after_transition = int(
                        round(features_after_transition / 32)) * 32
                    if i == 0:
                        add_to.add(nn.BatchNorm())
                        add_to.add(nn.QActivation(bits=bits_a))
                        add_to.add(
                            nn.QConv2D(features_after_transition,
                                       bits=bits,
                                       kernel_size=1))
                        add_to = self.features2
                        add_to.add(nn.AvgPool2D(pool_size=2, strides=2))
                    else:
                        add_to.add(nn.BatchNorm())
                        add_to.add(nn.QActivation(bits=bits_a))
                        add_to.add(
                            nn.QConv2D(features_after_transition,
                                       bits=bits,
                                       kernel_size=1))
                        add_to.add(nn.AvgPool2D(pool_size=2, strides=2))
                    num_features = features_after_transition
            add_to.add(nn.BatchNorm())
            add_to.add(nn.Activation('relu'))
            add_to.add(nn.AvgPool2D(pool_size=4 if thumbnail else 7))
            add_to.add(nn.Flatten())

            self.output = nn.Dense(classes)