def __init__(self, masks): self.m1 = masks['fc1'] self.m2 = masks['fc2'] self.m3 = masks['fc3'] # Model with <16,64,32,32,5> x 1/4 Behavior super(three_layer_model_bv_masked_quarter, self).__init__() self.input_shape = int(16) # (16,) self.quantized_model = True # variable to inform some of our plotting functions this is quantized self.weight_precision = 8 self.fc1 = qnn.QuantLinear(self.input_shape, int(16), bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.fc2 = qnn.QuantLinear(16, 8, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.fc3 = qnn.QuantLinear(8, 8, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.fc4 = qnn.QuantLinear(8, 5, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.act1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) # TODO Check/Change this away from 6, do we have to set a max value here? Can we not? self.act2 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) self.act3 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) self.softmax = nn.Softmax(0)
def __init__(self): # Model with <16,64,32,32,5> Behavior super(three_layer_model_bv, self).__init__() self.input_shape = int(16) # (16,) self.weight_precision = 4 self.fc1 = qnn.QuantLinear(self.input_shape, int(64), bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.fc2 = qnn.QuantLinear(64, 32, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.fc3 = qnn.QuantLinear(32, 32, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.fc4 = qnn.QuantLinear(32, 5, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.act = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6 ) #TODO Check/Change this away from 6, do we have to set a max value here? Can we not? self.softmax = nn.Softmax(0)
def __init__(self, masks, dims = [38,11,22], precision = 8, bn_affine = True, bn_stats = True ): self.m1 = masks['fc1'] self.m2 = masks['fc2'] self.m3 = masks['fc3'] self.m4 = masks['fc4'] self.dims = dims self.weight_precision = precision # Model with variable behavior super(three_layer_model_bv_tunable, self).__init__() self.input_shape = int(16) # (16,) self.quantized_model = True #variable to inform some of our plotting functions this is quantized self.fc1 = qnn.QuantLinear(self.input_shape, self.dims[0], bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.fc2 = qnn.QuantLinear(self.dims[0], self.dims[1], bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.fc3 = qnn.QuantLinear(self.dims[1], self.dims[2], bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.fc4 = qnn.QuantLinear(self.dims[2], 5, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.act1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) #TODO Check/Change this away from 6, do we have to set a max value here? Can we not? self.act2 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) self.act3 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) self.bn1 = nn.BatchNorm1d(self.dims[0], affine=bn_affine, track_running_stats=bn_stats) self.bn2 = nn.BatchNorm1d(self.dims[1], affine=bn_affine, track_running_stats=bn_stats) self.bn3 = nn.BatchNorm1d(self.dims[2], affine=bn_affine, track_running_stats=bn_stats) self.softmax = nn.Softmax(0)
def __init__(self): super(QNN_HARnn, self).__init__() self.hardtanh0 = qnn.QuantHardTanh(quant_type=QuantType.INT, bit_width=2, narrow_range=True, bit_width_impl_type=BitWidthImplType.CONST, min_val = -1.0, max_val = 1.0, restrict_scaling_type=RestrictValueType.LOG_FP, scaling_per_channel=False, scaling_impl_type=ScalingImplType.PARAMETER) self.dropout0 = torch.nn.Dropout(p = DROPOUT) self.linear1 = qnn.QuantLinear(8, 128, bias=False, weight_quant_type=QuantType.BINARY, weight_bit_width=1, weight_scaling_stats_op = StatsOp.AVE, weight_scaling_stats_sigma=0.001, weight_scaling_per_output_channel = True, weight_narrow_range = True, weight_bit_width_impl_type=BitWidthImplType.CONST) self.hardtanh1 = qnn.QuantHardTanh(quant_type=QuantType.INT, bit_width=2, narrow_range=True, bit_width_impl_type=BitWidthImplType.CONST, min_val = -1.0, max_val = 1.0, restrict_scaling_type=RestrictValueType.LOG_FP, scaling_per_channel=False, scaling_impl_type=ScalingImplType.PARAMETER) self.dropout1 = torch.nn.Dropout(p = DROPOUT) self.linear2 = qnn.QuantLinear(128, 3, bias=False, weight_quant_type=QuantType.BINARY, weight_bit_width=1, weight_scaling_stats_op = StatsOp.AVE, weight_scaling_stats_sigma=0.001, weight_scaling_per_output_channel = False, weight_narrow_range = True, weight_bit_width_impl_type=BitWidthImplType.CONST)
def __init__(self): super(QNN_HARnn, self).__init__() self.linear1 = qnn.QuantLinear(560, 200, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=8) self.relu1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6) self.linear2 = qnn.QuantLinear(200, 100, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=8) self.relu2 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6) self.linear3 = qnn.QuantLinear(100, 6, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=8)
def __init__(self): super(Net, self).__init__() self.layer = qnn.QuantLinear(784, 10, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=8)
def make_quant_linear( in_channels, out_channels, bias, bit_width, enable_bias_quant=ENABLE_BIAS_QUANT, weight_quant_type=QUANT_TYPE, weight_scaling_impl_type=WEIGHT_SCALING_IMPL_TYPE, weight_scaling_stats_op=WEIGHT_SCALING_STATS_OP, weight_scaling_per_output_channel=WEIGHT_SCALING_PER_OUTPUT_CHANNEL, weight_restrict_scaling_type=WEIGHT_RESTRICT_SCALING_TYPE, weight_narrow_range=WEIGHT_NARROW_RANGE, weight_scaling_min_val=SCALING_MIN_VAL): bias_quant_type = QUANT_TYPE if enable_bias_quant else QuantType.FP return qnn.QuantLinear( in_channels, out_channels, bias=bias, bias_quant_type=bias_quant_type, compute_output_bit_width=bias and enable_bias_quant, compute_output_scale=bias and enable_bias_quant, weight_bit_width=bit_width, weight_quant_type=weight_quant_type, weight_scaling_impl_type=weight_scaling_impl_type, weight_scaling_stats_op=weight_scaling_stats_op, weight_scaling_per_output_channel=weight_scaling_per_output_channel, weight_restrict_scaling_type=weight_restrict_scaling_type, weight_narrow_range=weight_narrow_range, weight_scaling_min_val=weight_scaling_min_val)
def make_quant_linear( bit_width, in_channels, out_channels, bias=ENABLE_BIAS, enable_bias_quant=ENABLE_BIAS_QUANT, weight_scaling_impl_type=WEIGHT_SCALING_IMPL_TYPE, weight_scaling_stats_op=WEIGHT_SCALING_STATS_OP, weight_scaling_per_output_channel=WEIGHT_SCALING_PER_OUTPUT_CHANNEL, weight_restrict_scaling_type=WEIGHT_RESTRICT_SCALING_TYPE, weight_narrow_range=WEIGHT_NARROW_RANGE, weight_scaling_min_val=SCALING_MIN_VAL, return_quant_tensor=WEIGHT_RETURN_QUANT_TENSOR): '''Helper for Linear (Fully Connected) layers''' weight_quant_type = get_quant_type(bit_width) bias_quant_type = weight_quant_type if enable_bias_quant else QuantType.FP return qnn.QuantLinear( in_channels, out_channels, bias=bias, bias_quant_type=bias_quant_type, compute_output_bit_width=bias and enable_bias_quant, compute_output_scale=bias and enable_bias_quant, weight_bit_width=bit_width, weight_quant_type=weight_quant_type, weight_scaling_impl_type=weight_scaling_impl_type, weight_scaling_stats_op=weight_scaling_stats_op, weight_scaling_per_output_channel=weight_scaling_per_output_channel, weight_restrict_scaling_type=weight_restrict_scaling_type, weight_narrow_range=weight_narrow_range, weight_scaling_min_val=weight_scaling_min_val)
def __init__(self, input_size, output_size): super(MultiHead4MLP, self).__init__() self.input_size = int(input_size / 4) self.relu = qnn.QuantReLU(bit_width=2, max_val=4) self.fc1a = qnn.QuantLinear(self.input_size, 128, bias=True, weight_bit_width=2) self.fc1b = qnn.QuantLinear(self.input_size, 128, bias=True, weight_bit_width=2) self.fc1c = qnn.QuantLinear(self.input_size, 128, bias=True, weight_bit_width=2) self.fc1d = qnn.QuantLinear(self.input_size, 128, bias=True, weight_bit_width=2) self.fc2 = qnn.QuantLinear(512, 128, bias=True, weight_bit_width=2) self.fc3 = qnn.QuantLinear(128, 64, bias=True, weight_bit_width=2) self.fc_out = qnn.QuantLinear(64, output_size, bias=False, weight_bit_width=2)
def __init__(self): super(SimpleNN, self).__init__() self.fc1 = qnn.QuantLinear(2, 2, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=2) self.relu1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=2, max_val=6) self.fc2 = qnn.QuantLinear(2, 1, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=2) self.relu2 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=2, max_val=6)
def __init__(self, bit_width=8, weight_bit_width=8): import brevitas.nn as qnn from brevitas.core.quant import QuantType super(QuantLeNet, self).__init__() self.conv1 = qnn.QuantConv2d(1, 6, 5, weight_quant_type=QuantType.INT, weight_bit_width=weight_bit_width, padding=2) self.relu1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=bit_width, max_val=6) self.conv2 = qnn.QuantConv2d(6, 16, 5, weight_quant_type=QuantType.INT, weight_bit_width=weight_bit_width, padding=2) self.relu2 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=bit_width, max_val=6) self.fc1 = qnn.QuantLinear(16 * 7 * 7, 120, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=weight_bit_width) self.relu3 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=bit_width, max_val=6) self.fc2 = qnn.QuantLinear(120, 84, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=weight_bit_width) self.relu4 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=bit_width, max_val=6) self.fc3 = qnn.QuantLinear(84, 10, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=weight_bit_width)
def __init__(self): super(QuantLeNet, self).__init__() self.conv1 = qnn.QuantConv2d(1, 6, 5, weight_quant_type=QuantType.INT, weight_bit_width=2, padding=2, bias=False) self.relu1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=2, max_val=6) self.conv2 = qnn.QuantConv2d(6, 16, 5, weight_quant_type=QuantType.INT, weight_bit_width=2, bias=False) self.relu2 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=2, max_val=6) self.fc1 = qnn.QuantLinear(16 * 5 * 5, 120, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=2) self.relu3 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=2, max_val=6) self.fc2 = qnn.QuantLinear(120, 84, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=2) self.relu4 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=2, max_val=6) self.fc3 = qnn.QuantLinear(84, 10, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=2)
def __init__(self): super(ErNET, self).__init__() self.conv1 = qnn.QuantConv2d( in_channels=3, out_channels=16, kernel_size=3, padding=0, bias=False, stride=2, weight_quant_type=QuantType.INT, weight_bit_width=16, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.acff1 = ACFF(16, 64) self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) self.acff2 = ACFF(64, 96) self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) self.acff3 = ACFF(96, 128) self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2) self.acff4 = ACFF(128, 128) self.acff5 = ACFF(128, 128) self.acff6 = ACFF(128, 256) self.conv2 = qnn.QuantConv2d( in_channels=256, out_channels=5, kernel_size=1, padding=0, stride=1, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=16, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.globalpool = nn.AvgPool2d(kernel_size=5, stride=1, padding=0) self.fc = qnn.QuantLinear( 3 * 3 * 5, 5, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=16, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.soft = nn.Softmax(dim=1)
def __init__(self): super(QuantXORNet, self).__init__() self.relu0 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=4.0, max_val=8) self.linear1 = qnn.QuantLinear(in_features = 2, out_features=2, bias_quant_type=QuantType.INT, bias=True, compute_output_scale=True, compute_output_bit_width=True, #input_bit_width=32, weight_quant_type=QuantType.INT) self.relu1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=4, max_val=8) self.linear2 = qnn.QuantLinear(in_features = 2, out_features=1, bias_quant_type=QuantType.INT, bias=True, compute_output_scale=True, compute_output_bit_width=True, #bit_width=4, weight_quant_type=QuantType.INT)
def make_quant_linear( in_channels, out_channels, bias, bit_width, scaling_per_output_channel=WEIGHT_SCALING_PER_OUTPUT_CHANNEL): return qnn.QuantLinear( in_channels, out_channels, bias=bias, weight_bit_width=bit_width, weight_quant_type=QUANT_TYPE, weight_scaling_impl_type=WEIGHT_SCALING_IMPL_TYPE, weight_scaling_stats_op=WEIGHT_SCALING_STATS_OP, weight_scaling_per_output_channel=scaling_per_output_channel, weight_restrict_scaling_type=WEIGHT_RESTRICT_SCALING_TYPE, weight_narrow_range=WEIGHT_NARROW_RANGE, weight_scaling_min_val=SCALING_MIN_VAL)
def make_quant_linear(in_features, out_features, bit_width, bias, weight_quant=WEIGHT_QUANTIZER, bias_quant=BIAS_QUANTIZER, return_quant_tensor=LINEAR_RETURN_QUANT_TENSOR, output_quant=ACT_QUANTIZER, enable_bias_quant=ENABLE_BIAS_QUANT, weight_scaling_min_val=SCALING_MIN_VAL): bias_quant_type = QUANT_TYPE if enable_bias_quant else QuantType.FP return qnn.QuantLinear(in_features, out_features, weight_quant=weight_quant, bias_quant=bias_quant, return_quant_tensor=return_quant_tensor, output_quant=output_quant, output_bit_width=bit_width, bias=bias, bias_quant_type=bias_quant_type, compute_output_bit_width=bias and enable_bias_quant, compute_output_scale=bias and enable_bias_quant, weight_bit_width=bit_width, weight_scaling_min_val=weight_scaling_min_val)
def __init__(self): super(Net, self).__init__() # Defining a 2D convolution layer self.conv1 = qnn.QuantConv2d(in_channels=1, out_channels=4, kernel_size=3, stride=1, padding=1, weight_quant_type=QuantType.INT, weight_bit_width=2, bias=False) self.relu1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=2, max_val=6) # Defining another 2D convolution layer self.conv2 = qnn.QuantConv2d(in_channels=4, out_channels=4, kernel_size=3, stride=1, padding=1, weight_quant_type=QuantType.INT, weight_bit_width=2, bias=False) self.relu2 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=2, max_val=6) self.fc1 = qnn.QuantLinear(in_features=4 * 7 * 7, out_features=10, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=2)
def __init__(self, is_train=False, train_landmarks=False): super(ONet, self).__init__() self.is_train = is_train self.train_landmarks = train_landmarks self.features = nn.Sequential( OrderedDict([ ('conv1', qnn.QuantConv2d(3, 32, 3, 1, weight_quant_type=QuantType.INT, weight_bit_width=8)), # 48 - 2 = 46 ('prelu1', qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)), ('pool1', nn.MaxPool2d(3, 2, ceil_mode=False)), # (46-3)/2 + 1 = 22 ('conv2', qnn.QuantConv2d(32, 64, 3, 1, weight_quant_type=QuantType.INT, weight_bit_width=8)), # 22 - 2 = 20 ('prelu2', qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)), ('pool2', nn.MaxPool2d(3, 2, ceil_mode=False)), # (20-3)/2 + 1 = 9 ('conv3', qnn.QuantConv2d(64, 64, 3, 1, weight_quant_type=QuantType.INT, bit_width=8, max_val=6)), # 9 - 2 = 7 ('prelu3', qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)), ('pool3', nn.MaxPool2d(2, 2, ceil_mode=False)), # (7-2)/2 + 1 = 3 ('conv4', qnn.QuantConv2d(64, 128, 2, 1, weight_quant_type=QuantType.INT, bit_width=8, max_val=6)), # 3 - 1 = 2 ('prelu4', qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)), ('flatten', Flatten()), ('conv5', qnn.QuantLinear(128 * 2 * 2, 256, weight_quant_type=QuantType.INT, bias=False, weight_bit_width=8)), ('prelu5', qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)), ('dropout', nn.Dropout(0.2)) ])) self.conv6_1 = qnn.QuantLinear(256, 2, weight_quant_type=QuantType.INT, bias=False, weight_bit_width=8) self.conv6_2 = qnn.QuantLinear(256, 4, weight_quant_type=QuantType.INT, bias=False, weight_bit_width=8) self.conv6_3 = qnn.QuantLinear(256, 10, weight_quant_type=QuantType.INT, bias=False, weight_bit_width=8)
def __init__(self, is_train=False): super(RNet, self).__init__() self.is_train = is_train self.features = nn.Sequential( OrderedDict([ ('conv1', qnn.QuantConv2d(3, 28, 3, 1, weight_quant_type=QuantType.INT, weight_bit_width=8)), # 24 -2 = 22 ('prelu1', qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)), ('pool1', nn.MaxPool2d(3, 2, ceil_mode=False)), # (22-3)/2 + 1 = 10 ('conv2', qnn.QuantConv2d(28, 48, 3, 1, weight_quant_type=QuantType.INT, weight_bit_width=8)), # 10 - 2 = 8 ('prelu2', qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)), ('pool2', nn.MaxPool2d(3, 2, ceil_mode=False)), # (8-3)/2 + 1 = 3 ('conv3', qnn.QuantConv2d(48, 64, 2, 1, weight_quant_type=QuantType.INT, weight_bit_width=8)), # 3 - 1 = 2 ('prelu3', qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)), ('flatten', Flatten()), ('conv4', qnn.QuantLinear(64 * 2 * 2, 128, weight_quant_type=QuantType.INT, bias=False, weight_bit_width=8)), ('prelu4', qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)), #('dropout', nn.Dropout(0.2)) ])) self.conv5_1 = qnn.QuantLinear(128, 2, weight_quant_type=QuantType.INT, bias=False, weight_bit_width=8) self.conv5_2 = qnn.QuantLinear(128, 4, weight_quant_type=QuantType.INT, bias=False, weight_bit_width=8)
def __init__(self, masks, precision=8): self.weight_precision = precision self.quantized_model = True self.e1 = masks['enc1'] self.e2 = masks['enc2'] self.e3 = masks['enc3'] self.e4 = masks['enc4'] self.d1 = masks['dec1'] self.d2 = masks['dec2'] self.d3 = masks['dec3'] self.d4 = masks['dec4'] self.do = masks['dout'] super(t2_autoencoder_masked_bv, self).__init__() self.input_size = 640 self.bottleneck_size = 8 self.hidden_size = 128 # Encoder self.enc1 = qnn.QuantLinear(self.input_size, self.hidden_size, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.ebn1 = nn.BatchNorm1d(self.hidden_size) self.eact1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) self.enc2 = qnn.QuantLinear(self.hidden_size, self.hidden_size, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.ebn2 = nn.BatchNorm1d(self.hidden_size) self.eact2 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) self.enc3 = qnn.QuantLinear(self.hidden_size, self.hidden_size, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.ebn3 = nn.BatchNorm1d(self.hidden_size) self.eact3 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) self.enc4 = qnn.QuantLinear(self.hidden_size, self.bottleneck_size, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.ebn4 = nn.BatchNorm1d(self.bottleneck_size) self.eact4 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) # Decoder self.dec1 = qnn.QuantLinear(self.bottleneck_size, self.hidden_size, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.dbn1 = nn.BatchNorm1d(self.hidden_size) self.dact1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) self.dec2 = qnn.QuantLinear(self.hidden_size, self.hidden_size, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.dbn2 = nn.BatchNorm1d(self.hidden_size) self.dact2 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) self.dec3 = qnn.QuantLinear(self.hidden_size, self.hidden_size, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.dbn3 = nn.BatchNorm1d(self.hidden_size) self.dact3 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) self.dec4 = qnn.QuantLinear(self.hidden_size, self.hidden_size, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.dbn4 = nn.BatchNorm1d(self.hidden_size) self.dact4 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) # Output self.dout = qnn.QuantLinear(self.hidden_size, self.input_size, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision)
def __init__(self, VGG_type='A', batch_norm=False, bit_width=8, num_classes=1000, pretrained_model=None): super(QuantVGG, self).__init__() self.logger = get_logger(name=("{}{}".format(__name__, dist.get_rank()) if dist.is_initialized() else __name__)) self.inp_quant = qnn.QuantIdentity(bit_width=bit_width, act_quant=INPUT_QUANTIZER, return_quant_tensor=RETURN_QUANT_TENSOR) self.features = make_layers(cfgs[VGG_type], batch_norm, bit_width) self.avgpool = qnn.QuantAdaptiveAvgPool2d((7, 7)) self.classifier = nn.Sequential( qnn.QuantLinear(512 * 7 * 7, 4096, bias=True, bias_quant=BIAS_QUANTIZER, weight_quant=WEIGHT_QUANTIZER, weight_bit_width=bit_width, weight_scaling_min_val=SCALING_MIN_VAL, return_quant_tensor=RETURN_QUANT_TENSOR), qnn.QuantReLU(bit_width=bit_width, act_quant=ACT_QUANTIZER, return_quant_tensor=RETURN_QUANT_TENSOR), qnn.QuantDropout(), qnn.QuantLinear(4096, 4096, bias=True, bias_quant=BIAS_QUANTIZER, weight_quant=WEIGHT_QUANTIZER, weight_bit_width=bit_width, weight_scaling_min_val=SCALING_MIN_VAL, return_quant_tensor=RETURN_QUANT_TENSOR), qnn.QuantReLU(bit_width=bit_width, act_quant=ACT_QUANTIZER, return_quant_tensor=RETURN_QUANT_TENSOR), nn.Dropout(), qnn.QuantLinear(4096, num_classes, bias=False, weight_quant=WEIGHT_QUANTIZER, weight_scaling_min_val=SCALING_MIN_VAL, weight_bit_width=bit_width, return_quant_tensor=False), ) self.classifier[0].cache_inference_quant_bias = True self.classifier[3].cache_inference_quant_bias = True self.classifier[6].cache_inference_quant_bias = True if is_master(): print_config(self.logger) if pretrained_model == None: self._initialize_weights() else: pre_model = None if pretrained_model == 'pytorch': self.logger.info( "Initializing with pretrained model from PyTorch") # use pytorch's pretrained model pre_model = models.vgg16(pretrained=True) else: pre_model = VGG_net(VGG_type=VGG_type, batch_norm=batch_norm, num_classes=num_classes) loaded_model = torch.load(pretrained_model)['state_dict'] # check if model was trained using DataParallel, keys() return 'odict_keys' which does not support indexing if next(iter(loaded_model.keys())).startswith('module'): # if model is trained w/ DataParallel it's warraped under module pre_model = torch.nn.DataParallel(pre_model) pre_model.load_state_dict(loaded_model) unwrapped_sd = pre_model.module.state_dict() pre_model = VGG_net(VGG_type=VGG_type, batch_norm=batch_norm, num_classes=num_classes) pre_model.load_state_dict(unwrapped_sd) else: pre_model.load_state_dict(loaded_model) self._initialize_custom_weights(pre_model) self.logger.info("Initialization Done")
def __init__(self): super(VGG, self).__init__() self.conv1 = qnn.QuantConv2d( in_channels=3, out_channels=64, kernel_size=3, padding=1, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relu1 = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=8, max_val=1 - 1 / 128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST) self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv2 = qnn.QuantConv2d( in_channels=64, out_channels=128, kernel_size=3, padding=1, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relu2 = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=8, max_val=1 - 1 / 128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST) self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv3 = qnn.QuantConv2d( in_channels=128, out_channels=256, kernel_size=3, padding=1, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relu3 = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=8, max_val=1 - 1 / 128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST) self.conv4 = qnn.QuantConv2d( in_channels=256, out_channels=256, kernel_size=3, padding=1, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relu4 = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=8, max_val=1 - 1 / 128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST) self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv5 = qnn.QuantConv2d( in_channels=256, out_channels=512, kernel_size=3, padding=1, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relu5 = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=8, max_val=1 - 1 / 128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST) self.conv6 = qnn.QuantConv2d( in_channels=512, out_channels=512, kernel_size=3, padding=1, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relu6 = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=8, max_val=1 - 1 / 128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST) self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv7 = qnn.QuantConv2d( in_channels=256, out_channels=512, kernel_size=3, padding=1, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relu7 = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=8, max_val=1 - 1 / 128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST) self.conv8 = qnn.QuantConv2d( in_channels=512, out_channels=512, kernel_size=3, padding=1, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relu8 = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=8, max_val=1 - 1 / 128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST) self.pool5 = nn.MaxPool2d(kernel_size=2, stride=2) """ full precision layers self.fc1 = nn.Linear(4*4*256, 1024) self.relufc1 = nn.ReLU() self.fc2 = nn.Linear(1024,512) self.relufc2 = nn.ReLU() self.fc2 = nn.Linear(512, 10) """ self.fc1 = qnn.QuantLinear( 8192, 4096, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=32, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relufc1 = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=8, max_val=1 - 1 / 128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST) self.fc2 = qnn.QuantLinear( 4096, 1024, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relufc2 = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=8, max_val=1 - 1 / 128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST) self.fc3 = qnn.QuantLinear( 1024, 1024, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relufc3 = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=8, max_val=1 - 1 / 128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST) self.fc4 = qnn.QuantLinear( 1024, 10, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0)
def __init__(self): super(LeNet5, self).__init__() self.conv1 = qnn.QuantConv2d(in_channels= 1, out_channels= 20, kernel_size= 3, padding= 1, bias= False, weight_quant_type=QuantType.INT, weight_bit_width= total_bits, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relu1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val= 1- 1/128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST ) self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv2 = qnn.QuantConv2d(in_channels= 20, out_channels= 50, kernel_size= 3, padding= 1, bias= False, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0 ) self.relu2 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val= 1- 1/128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST ) self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) """ # for 32-bit precision FC layers self.fc1 = nn.Linear(7*7*50, 500) self.relu3 = nn.ReLU() self.fc2 = nn.Linear(500,10) """ # for fixed-point precision FC layers self.fc1 = qnn.QuantLinear(7*7*50, 500, bias= True, weight_quant_type=QuantType.INT, weight_bit_width=32, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relu3 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val= 1- 1/128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST ) self.fc2 = qnn.QuantLinear(500, 10, bias= True, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0)