def test_binary_layer_config_qconv(bits, weight_quantization, input_shape=(1, 3, 7, 7), channel=2): d = np.random.uniform(-1, 1, input_shape) in_data = mx.nd.array(d) in_data.attach_grad() qconv = nn.QConv2D(channel, 3, bits=bits, quantization=weight_quantization, in_channels=input_shape[1]) qconv.initialize(mx.init.Xavier(magnitude=2)) with nn.set_binary_layer_config(bits=bits, weight_quantization=weight_quantization): qconv_config = nn.QConv2D(channel, 3, params=qconv.collect_params(), in_channels=input_shape[1]) grad, y = forward(in_data, qconv) grad_, y_ = forward(in_data, qconv_config) np.testing.assert_almost_equal(y, y_) np.testing.assert_almost_equal(grad, grad_)
def __init__(self, classes=1000, **kwargs): super(AlexNet, self).__init__(**kwargs) with self.name_scope(): self.features = nn.HybridSequential(prefix='') with self.features.name_scope(): self.features.add( nn.Conv2D(64, kernel_size=11, strides=4, padding=2, activation='relu')) self.features.add(nn.MaxPool2D(pool_size=3, strides=2)) self.features.add(nn.QConv2D(192, kernel_size=5, padding=2)) self.features.add(nn.MaxPool2D(pool_size=3, strides=2)) self.features.add(nn.QConv2D(384, kernel_size=3, padding=1)) self.features.add(nn.QConv2D(256, kernel_size=3, padding=1)) self.features.add(nn.QConv2D(256, kernel_size=3, padding=1)) self.features.add(nn.MaxPool2D(pool_size=3, strides=2)) self.features.add(nn.Flatten()) self.features.add(nn.QDense(4096)) self.features.add(nn.Dropout(0.5)) self.features.add(nn.QDense(4096)) self.features.add(nn.Dropout(0.5)) self.output = nn.Dense(classes)
def _make_dense_layer(bits, bits_a, growth_rate, bn_size, dropout): new_features = nn.HybridSequential(prefix='') if bn_size == 0: # no bottleneck new_features.add(nn.QActivation(bits=bits_a)) new_features.add( nn.QConv2D(growth_rate, bits=bits, kernel_size=3, padding=1)) if dropout: new_features.add(nn.Dropout(dropout)) new_features.add(nn.BatchNorm()) else: # bottleneck design new_features.add(nn.BatchNorm()) new_features.add(nn.QActivation(bits=bits_a)) new_features.add( nn.QConv2D(bn_size * growth_rate, bits=bits, kernel_size=1)) if dropout: new_features.add(nn.Dropout(dropout)) new_features.add(nn.BatchNorm()) new_features.add(nn.QActivation(bits=bits_a)) new_features.add( nn.QConv2D(growth_rate, bits=bits, kernel_size=3, padding=1)) if dropout: new_features.add(nn.Dropout(dropout)) out = HybridConcurrent(axis=1, prefix='') out.add(Identity()) out.add(new_features) return out
def __init__(self, channels, stride, downsample=False, in_channels=0, **kwargs): super(BottleneckV1, self).__init__(**kwargs) self.body = nn.HybridSequential(prefix='') self.body.add(nn.QConv2D(channels // 4, kernel_size=1, strides=stride)) self.body.add(nn.BatchNorm()) self.body.add(nn.Activation('relu')) self.body.add(_conv3x3(channels // 4, 1, channels // 4)) self.body.add(nn.BatchNorm()) self.body.add(nn.Activation('relu')) self.body.add(nn.QConv2D(channels, kernel_size=1, strides=1)) self.body.add(nn.BatchNorm()) if downsample: self.downsample = nn.HybridSequential(prefix='') self.downsample.add( nn.QConv2D(channels, kernel_size=1, strides=stride, use_bias=False, in_channels=in_channels)) self.downsample.add(nn.BatchNorm()) else: self.downsample = None
def __init__(self, channels, stride, downsample=False, in_channels=0, **kwargs): super(BottleneckV2, self).__init__(**kwargs) self.bn1 = nn.BatchNorm() self.conv1 = nn.QConv2D(channels // 4, kernel_size=1, strides=1, use_bias=False) self.bn2 = nn.BatchNorm() self.conv2 = _conv3x3(channels // 4, stride, channels // 4) self.bn3 = nn.BatchNorm() self.conv3 = nn.QConv2D(channels, kernel_size=1, strides=1, use_bias=False) if downsample: self.downsample = nn.QConv2D(channels, 1, stride, use_bias=False, in_channels=in_channels) else: self.downsample = None
def _conv3x3(bits, channels, stride, in_channels): return nn.QConv2D(channels, bits=bits, kernel_size=3, strides=stride, padding=1, in_channels=in_channels)
def _make_transition(bits, bits_a, num_output_features): out = nn.HybridSequential(prefix='') out.add(nn.QActivation(bits=bits_a)) out.add(nn.QConv2D(num_output_features, bits=bits, kernel_size=1)) out.add(nn.AvgPool2D(pool_size=2, strides=2)) out.add(nn.BatchNorm()) return out
def test_binary_inference_conv(): bits_binary_word = 32 input_dim = 32 output_dim = 1 batch_size = 10 kernel_dim = 1 input_data = mx.nd.random.normal(-1, 1, shape=(batch_size, input_dim, kernel_dim, kernel_dim)) weight = mx.nd.random.normal(-1, 1, shape=(output_dim, input_dim, kernel_dim, kernel_dim)) # weights concatenation size_binary_row = int(weight.size / bits_binary_word) weight_concatenated = np.zeros((size_binary_row), dtype='uint32') weight_concatenated = mx.nd.array(get_binary_row(weight.reshape(-1), weight_concatenated, weight.size, bits_binary_word), dtype='float64') weight_concatenated = weight_concatenated.reshape( (weight.shape[0], -1, weight.shape[2], weight.shape[3])) # create binary inference conv layer binary_infer_result = mx.ndarray.BinaryInferenceConvolution( data=input_data, weight=weight_concatenated, kernel=(kernel_dim, kernel_dim), num_filter=output_dim) binary_infer_result2 = mx.ndarray.BinaryInferenceConvolution( data=input_data, weight=weight_concatenated, kernel=(kernel_dim, kernel_dim), num_filter=output_dim) # create qconv2d layer, assign weights and set input_data. qconv_layer = nn.QConv2D(output_dim, kernel_dim, bits=1, use_bias=False, in_channels=input_dim, apply_scaling=False, no_offset=False) qact = nn.QActivation(bits=1) qact_result = qact.forward(input_data) qconv_result = qconv_layer.hybrid_forward(F, x=qact_result, weight=weight) np.testing.assert_equal(binary_infer_result.asnumpy(), binary_infer_result2.asnumpy())
def test_qconvolution_scaling(input_shape, bits, channel=16, kernel=(3, 3)): d = np.random.uniform(-1, 1, input_shape) in_data = mx.nd.array(d) in_data.attach_grad() qconv_scaled = nn.QConv2D(channel, kernel, bits, use_bias=False, no_offset=True, apply_scaling=True) qconv_scaled.initialize(mx.init.Xavier(magnitude=2)) qconv_std = nn.QConv2D(channel, kernel, bits, use_bias=False, no_offset=True, apply_scaling=False, params=qconv_scaled.collect_params()) conv = nn.Conv2D(channel, kernel, use_bias=False, params=qconv_scaled.collect_params()) grad_scaled, result_scaled = forward(in_data, qconv_scaled) grad_std, result_std = forward(in_data, qconv_std) grad, result = forward(in_data, conv) def mse(a, b): return ((a - b)**2).mean() def sign_match(a, b): return np.mean(np.sign(a) * np.sign(b)) assert mse(result, result_scaled) < mse(result, result_std) assert sign_match(result_std, result_scaled) > 0.95
def __init__(self, version, classes=1000, **kwargs): super(SqueezeNet, self).__init__(**kwargs) assert version in ['1.0', '1.1' ], ("Unsupported SqueezeNet version {version}:" "1.0 or 1.1 expected".format(version=version)) with self.name_scope(): self.features = nn.HybridSequential(prefix='') if version == '1.0': self.features.add(nn.Conv2D(96, kernel_size=7, strides=2)) self.features.add(nn.Activation('relu')) self.features.add( nn.MaxPool2D(pool_size=3, strides=2, ceil_mode=True)) self.features.add(_make_fire(16, 64, 64)) self.features.add(_make_fire(16, 64, 64)) self.features.add(_make_fire(32, 128, 128)) self.features.add( nn.MaxPool2D(pool_size=3, strides=2, ceil_mode=True)) self.features.add(_make_fire(32, 128, 128)) self.features.add(_make_fire(48, 192, 192)) self.features.add(_make_fire(48, 192, 192)) self.features.add(_make_fire(64, 256, 256)) self.features.add( nn.MaxPool2D(pool_size=3, strides=2, ceil_mode=True)) self.features.add(_make_fire(64, 256, 256)) else: self.features.add(nn.Conv2D(64, kernel_size=3, strides=2)) self.features.add(nn.Activation('relu')) self.features.add( nn.MaxPool2D(pool_size=3, strides=2, ceil_mode=True)) self.features.add(_make_fire(16, 64, 64)) self.features.add(_make_fire(16, 64, 64)) self.features.add( nn.MaxPool2D(pool_size=3, strides=2, ceil_mode=True)) self.features.add(_make_fire(32, 128, 128)) self.features.add(_make_fire(32, 128, 128)) self.features.add( nn.MaxPool2D(pool_size=3, strides=2, ceil_mode=True)) self.features.add(_make_fire(48, 192, 192)) self.features.add(_make_fire(48, 192, 192)) self.features.add(_make_fire(64, 256, 256)) self.features.add(_make_fire(64, 256, 256)) self.features.add(nn.Dropout(0.5)) self.output = nn.HybridSequential(prefix='') self.output.add(nn.QConv2D(classes, kernel_size=1)) self.output.add(nn.Activation('relu')) self.output.add(nn.AvgPool2D(13)) self.output.add(nn.Flatten())
def _add_conv(out, channels=1, kernel=1, stride=1, pad=0, num_group=1, active=True, relu6=False): out.add( nn.QConv2D(channels, kernel, stride, pad, groups=num_group, use_bias=False)) out.add(nn.BatchNorm(scale=True)) if active: out.add(RELU6() if relu6 else nn.Activation('relu'))
def _make_features(self, layers, filters, batch_norm, step): featurizer = nn.HybridSequential(prefix='') count = 0 for i, num in enumerate(layers): for _ in range(num): if count not in step_spec[step]: conv_layer = nn.QConv2D(filters[i], kernel_size=3, padding=1, weight_initializer=Xavier( rnd_type='gaussian', factor_type='out', magnitude=2), bias_initializer='zeros', bits=1, apply_scaling=True) featurizer.add(conv_layer) featurizer.add(nn.Dropout(rate=0.25)) featurizer.add(nn.Activation('relu')) else: conv_layer = nn.Conv2D(filters[i], kernel_size=3, padding=1, weight_initializer=Xavier( rnd_type='gaussian', factor_type='out', magnitude=2), bias_initializer='zeros') featurizer.add(conv_layer) featurizer.add(nn.Dropout(rate=0.25)) featurizer.add(nn.Activation('relu')) count += 1 if batch_norm: featurizer.add(nn.BatchNorm()) featurizer.add(nn.MaxPool2D(strides=2)) return featurizer
def __init__(self, bits, bits_a, channels, stride, downsample=False, in_channels=0, clip_threshold=1.0, **kwargs): super(BasicBlockV1, self).__init__(**kwargs) self.layer1 = nn.HybridSequential(prefix='') # Dif to Resnet: One layer is Sign + conv + batchnorm. There are shortcuts around all layers self.layer1.add( nn.QActivation(bits=bits_a, gradient_cancel_threshold=clip_threshold)) self.layer1.add(_conv3x3(bits, channels, stride, in_channels)) self.layer1.add(nn.BatchNorm()) self.layer2 = nn.HybridSequential(prefix='') self.layer2.add( nn.QActivation(bits=bits_a, gradient_cancel_threshold=clip_threshold)) self.layer2.add(_conv3x3(bits, channels, 1, channels)) self.layer2.add(nn.BatchNorm()) if downsample: self.downsample = nn.HybridSequential(prefix='') self.downsample.add(nn.AvgPool2D(pool_size=2, strides=2, padding=0)) self.downsample.add( nn.QConv2D(channels, kernel_size=1, strides=1, in_channels=in_channels, prefix="sc_qconv_")) else: self.downsample = None
def _make_basic_conv(**kwargs): out = nn.HybridSequential(prefix='') out.add(nn.QConv2D(use_bias=False, **kwargs)) out.add(nn.BatchNorm(epsilon=0.001)) out.add(nn.Activation('relu')) return out
def residual_unit(data, num_filter, stride, dim_match, name, isBin=False): bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=eps, use_global_stats=use_global_stats, name=name + '_bn1') act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1') if isBin: gluon_layer1 = nn.QConv2D(channels=int(num_filter * 0.25), kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), bits=1, prefix=name + '_conv1_', apply_scaling=True) gluon_layer1.hybridize() conv1 = gluon_layer1(act1) else: conv1 = mx.sym.Convolution(data=act1, num_filter=int(num_filter * 0.25), kernel=(1, 1), stride=(1, 1), pad=(0, 0), no_bias=True, workspace=workspace, name=name + '_conv1') bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, eps=eps, use_global_stats=use_global_stats, name=name + '_bn2') act2 = mx.sym.Activation(data=bn2, act_type='relu', name=name + '_relu2') if isBin: gluon_layer2 = nn.QConv2D(channels=int(num_filter * 0.25), kernel_size=3, strides=stride, padding=(1, 1), bits=1, prefix=name + '_conv2_', apply_scaling=True) gluon_layer2.hybridize() conv2 = gluon_layer2(act2) else: conv2 = mx.sym.Convolution(data=act2, num_filter=int(num_filter * 0.25), kernel=(3, 3), stride=stride, pad=(1, 1), no_bias=True, workspace=workspace, name=name + '_conv2') bn3 = mx.sym.BatchNorm(data=conv2, fix_gamma=False, eps=eps, use_global_stats=use_global_stats, name=name + '_bn3') act3 = mx.sym.Activation(data=bn3, act_type='relu', name=name + '_relu3') if isBin: gluon_layer3 = nn.QConv2D(channels=num_filter, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), bits=1, prefix=name + '_conv3_', apply_scaling=True) gluon_layer3.hybridize() conv3 = gluon_layer3(act3) else: conv3 = mx.sym.Convolution(data=act3, num_filter=num_filter, kernel=(1, 1), stride=(1, 1), pad=(0, 0), no_bias=True, workspace=workspace, name=name + '_conv3') if dim_match: shortcut = data else: shortcut = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(1, 1), stride=stride, no_bias=True, workspace=workspace, name=name + '_sc') sum = mx.sym.ElementWiseSum(*[conv3, shortcut], name=name + '_plus') return sum
def __init__(self, num_filter, stride, dim_match, isBin=False, prefix='', **kwargs): super(ResidualUnit, self).__init__(**kwargs) self.dim_match = dim_match self.features = nn.HybridSequential() self.bn1 = nn.BatchNorm(use_global_stats=use_global_stats, prefix=prefix + '_nb1_') self.act1 = nn.Activation('relu') self.scale = nn.Conv2D(channels=num_filter, kernel_size=(1, 1), strides=stride, use_bias=False, prefix=prefix + '_sc_') if isBin: self.features.add( nn.QConv2D(channels=int(num_filter * 0.25), kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), use_bias=False, apply_scaling=True, prefix=prefix + '_conv1_')) self.features.add( nn.BatchNorm(use_global_stats=use_global_stats, prefix=prefix + '_nb2_')) self.features.add(nn.Activation('relu')) self.features.add( nn.QConv2D(channels=int(num_filter * 0.25), kernel_size=(3, 3), strides=stride, padding=(1, 1), use_bias=False, apply_scaling=True, prefix=prefix + '_conv2_')) self.features.add( nn.BatchNorm(use_global_stats=use_global_stats, prefix=prefix + '_nb3_')) self.features.add(nn.Activation('relu')) self.features.add( nn.QConv2D(channels=num_filter, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), use_bias=False, apply_scaling=True, prefix=prefix + '_conv3_')) else: self.features.add( nn.Conv2D(channels=int(num_filter * 0.25), kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), use_bias=False, prefix=prefix + '_conv1_')) self.features.add( nn.BatchNorm(use_global_stats=use_global_stats, prefix=prefix + '_nb2_')) self.features.add(nn.Activation('relu')) self.features.add( nn.Conv2D(channels=int(num_filter * 0.25), kernel_size=(3, 3), strides=stride, padding=(1, 1), use_bias=False, prefix=prefix + '_conv2_')) self.features.add( nn.BatchNorm(use_global_stats=use_global_stats, prefix=prefix + '_nb3_')) self.features.add(nn.Activation('relu')) self.features.add( nn.Conv2D(channels=num_filter, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), use_bias=False, prefix=prefix + '_conv3_'))
def _make_fire_conv(channels, kernel_size, padding=0): out = nn.HybridSequential(prefix='') out.add(nn.QConv2D(channels, kernel_size, padding=padding)) out.add(nn.Activation('relu')) return out
def __init__(self, bits, bits_a, num_init_features, growth_rate, block_config, reduction, bn_size, modifier=[], thumbnail=False, dropout=0, classes=1000, **kwargs): assert len(modifier) == 0 super(DenseNetX, self).__init__(**kwargs) with self.name_scope(): self.fp_features = nn.HybridSequential(prefix='') if thumbnail: self.fp_features.add( nn.Conv2D(num_init_features, kernel_size=3, strides=1, padding=1, in_channels=0, use_bias=False)) else: self.fp_features.add( nn.Conv2D(num_init_features, kernel_size=7, strides=2, padding=3, use_bias=False)) self.fp_features.add(nn.BatchNorm()) self.fp_features.add(nn.Activation('relu')) self.fp_features.add( nn.MaxPool2D(pool_size=3, strides=2, padding=1)) # Add dense blocks num_features = num_init_features self.features1 = nn.HybridSequential(prefix='') self.features2 = nn.HybridSequential(prefix='') add_to = self.features1 for i, num_layers in enumerate(block_config): add_to.add( _make_dense_block(bits, bits_a, num_layers, bn_size, growth_rate, dropout, i + 1)) num_features = num_features + num_layers * growth_rate if i != len(block_config) - 1: features_after_transition = num_features // reduction[i] # make it to be multiples of 32 features_after_transition = int( round(features_after_transition / 32)) * 32 if i == 0: add_to.add(nn.BatchNorm()) add_to.add(nn.QActivation(bits=bits_a)) add_to.add( nn.QConv2D(features_after_transition, bits=bits, kernel_size=1)) add_to = self.features2 add_to.add(nn.AvgPool2D(pool_size=2, strides=2)) else: add_to.add(nn.BatchNorm()) add_to.add(nn.QActivation(bits=bits_a)) add_to.add( nn.QConv2D(features_after_transition, bits=bits, kernel_size=1)) add_to.add(nn.AvgPool2D(pool_size=2, strides=2)) num_features = features_after_transition add_to.add(nn.BatchNorm()) add_to.add(nn.Activation('relu')) add_to.add(nn.AvgPool2D(pool_size=4 if thumbnail else 7)) add_to.add(nn.Flatten()) self.output = nn.Dense(classes)