def __init__(self, masks): self.m1 = masks['fc1'] self.m2 = masks['fc2'] self.m3 = masks['fc3'] # Model with <16,64,32,32,5> x 1/4 Behavior super(three_layer_model_bv_masked_quarter, self).__init__() self.input_shape = int(16) # (16,) self.quantized_model = True # variable to inform some of our plotting functions this is quantized self.weight_precision = 8 self.fc1 = qnn.QuantLinear(self.input_shape, int(16), bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.fc2 = qnn.QuantLinear(16, 8, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.fc3 = qnn.QuantLinear(8, 8, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.fc4 = qnn.QuantLinear(8, 5, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.act1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) # TODO Check/Change this away from 6, do we have to set a max value here? Can we not? self.act2 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) self.act3 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) self.softmax = nn.Softmax(0)
def __init__(self, masks, dims = [38,11,22], precision = 8, bn_affine = True, bn_stats = True ): self.m1 = masks['fc1'] self.m2 = masks['fc2'] self.m3 = masks['fc3'] self.m4 = masks['fc4'] self.dims = dims self.weight_precision = precision # Model with variable behavior super(three_layer_model_bv_tunable, self).__init__() self.input_shape = int(16) # (16,) self.quantized_model = True #variable to inform some of our plotting functions this is quantized self.fc1 = qnn.QuantLinear(self.input_shape, self.dims[0], bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.fc2 = qnn.QuantLinear(self.dims[0], self.dims[1], bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.fc3 = qnn.QuantLinear(self.dims[1], self.dims[2], bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.fc4 = qnn.QuantLinear(self.dims[2], 5, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.act1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) #TODO Check/Change this away from 6, do we have to set a max value here? Can we not? self.act2 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) self.act3 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) self.bn1 = nn.BatchNorm1d(self.dims[0], affine=bn_affine, track_running_stats=bn_stats) self.bn2 = nn.BatchNorm1d(self.dims[1], affine=bn_affine, track_running_stats=bn_stats) self.bn3 = nn.BatchNorm1d(self.dims[2], affine=bn_affine, track_running_stats=bn_stats) self.softmax = nn.Softmax(0)
def __init__(self): super(QNN_HARnn, self).__init__() self.linear1 = qnn.QuantLinear(560, 200, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=8) self.relu1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6) self.linear2 = qnn.QuantLinear(200, 100, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=8) self.relu2 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6) self.linear3 = qnn.QuantLinear(100, 6, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=8)
def __init__(self, input_size, output_size): super(MultiHead4MLP, self).__init__() self.input_size = int(input_size / 4) self.relu = qnn.QuantReLU(bit_width=2, max_val=4) self.fc1a = qnn.QuantLinear(self.input_size, 128, bias=True, weight_bit_width=2) self.fc1b = qnn.QuantLinear(self.input_size, 128, bias=True, weight_bit_width=2) self.fc1c = qnn.QuantLinear(self.input_size, 128, bias=True, weight_bit_width=2) self.fc1d = qnn.QuantLinear(self.input_size, 128, bias=True, weight_bit_width=2) self.fc2 = qnn.QuantLinear(512, 128, bias=True, weight_bit_width=2) self.fc3 = qnn.QuantLinear(128, 64, bias=True, weight_bit_width=2) self.fc_out = qnn.QuantLinear(64, output_size, bias=False, weight_bit_width=2)
def __init__(self): # Model with <16,64,32,32,5> Behavior super(three_layer_model_bv, self).__init__() self.input_shape = int(16) # (16,) self.weight_precision = 4 self.fc1 = qnn.QuantLinear(self.input_shape, int(64), bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.fc2 = qnn.QuantLinear(64, 32, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.fc3 = qnn.QuantLinear(32, 32, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.fc4 = qnn.QuantLinear(32, 5, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.act = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6 ) #TODO Check/Change this away from 6, do we have to set a max value here? Can we not? self.softmax = nn.Softmax(0)
def make_layers(cfg, batch_norm, bit_width): layers = [] in_channels = 3 assert not(batch_norm & RETURN_QUANT_TENSOR), "nn.BatchNorm2d does not accept Quant tensor" for v in cfg: if v == 'M': layers += [qnn.QuantMaxPool2d(kernel_size=2, stride=2)] else: conv2d = qnn.QuantConv2d(in_channels, v, kernel_size=3, stride=1, padding=1, groups=1, bias_quant=BIAS_QUANTIZER, weight_bit_width=bit_width, weight_quant=WEIGHT_QUANTIZER, weight_scaling_min_val=SCALING_MIN_VAL, weight_scaling_per_output_channel=WEIGHT_SCALING_PER_OUTPUT_CHANNEL, return_quant_tensor=RETURN_QUANT_TENSOR) conv2d.cache_inference_quant_out = True conv2d.cache_inference_quant_bias = True act = qnn.QuantReLU(bit_width=bit_width, act_quant=ACT_QUANTIZER, return_quant_tensor=RETURN_QUANT_TENSOR) if batch_norm: layers += [conv2d, nn.BatchNorm2d(v), act] else: layers += [conv2d, act] in_channels = v return nn.Sequential(*layers)
def make_relu_activation(bit_width): return quant_nn.QuantReLU(bit_width=bit_width, max_val=ACT_MAX_VAL, quant_type=QUANT_TYPE, scaling_impl_type=ACT_SCALING_IMPL_TYPE, scaling_min_val=SCALING_MIN_VAL, return_quant_tensor=False)
def make_quant_relu(bit_width): return qnn.QuantReLU( bit_width=bit_width, quant_type=QUANT_TYPE, scaling_impl_type=ACT_SCALING_IMPL_TYPE, scaling_per_channel=ACT_SCALING_PER_CHANNEL, restrict_scaling_type=ACT_SCALING_RESTRICT_SCALING_TYPE, scaling_min_val=SCALING_MIN_VAL, max_val=ACT_MAX_VAL)
def __init__(self): super(SimpleNN, self).__init__() self.fc1 = qnn.QuantLinear(2, 2, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=2) self.relu1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=2, max_val=6) self.fc2 = qnn.QuantLinear(2, 1, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=2) self.relu2 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=2, max_val=6)
def __init__(self, bit_width=8, weight_bit_width=8): import brevitas.nn as qnn from brevitas.core.quant import QuantType super(QuantLeNet, self).__init__() self.conv1 = qnn.QuantConv2d(1, 6, 5, weight_quant_type=QuantType.INT, weight_bit_width=weight_bit_width, padding=2) self.relu1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=bit_width, max_val=6) self.conv2 = qnn.QuantConv2d(6, 16, 5, weight_quant_type=QuantType.INT, weight_bit_width=weight_bit_width, padding=2) self.relu2 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=bit_width, max_val=6) self.fc1 = qnn.QuantLinear(16 * 7 * 7, 120, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=weight_bit_width) self.relu3 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=bit_width, max_val=6) self.fc2 = qnn.QuantLinear(120, 84, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=weight_bit_width) self.relu4 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=bit_width, max_val=6) self.fc3 = qnn.QuantLinear(84, 10, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=weight_bit_width)
def __init__(self): super(QuantLeNet, self).__init__() self.conv1 = qnn.QuantConv2d(1, 6, 5, weight_quant_type=QuantType.INT, weight_bit_width=2, padding=2, bias=False) self.relu1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=2, max_val=6) self.conv2 = qnn.QuantConv2d(6, 16, 5, weight_quant_type=QuantType.INT, weight_bit_width=2, bias=False) self.relu2 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=2, max_val=6) self.fc1 = qnn.QuantLinear(16 * 5 * 5, 120, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=2) self.relu3 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=2, max_val=6) self.fc2 = qnn.QuantLinear(120, 84, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=2) self.relu4 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=2, max_val=6) self.fc3 = qnn.QuantLinear(84, 10, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=2)
def make_quant_relu(bit_width, input_quant, act_quant=ACT_QUANTIZER, restrict_scaling_type=ACT_SCALING_RESTRICT_SCALING_TYPE, scaling_min_val=SCALING_MIN_VAL, max_val=ACT_MAX_VAL, return_quant_tensor=ACT_RETURN_QUANT_TENSOR): return qnn.QuantReLU(bit_width=bit_width, input_quant=input_quant, act_quant=act_quant, restrict_scaling_type=restrict_scaling_type, scaling_min_val=scaling_min_val, max_val=max_val, return_quant_tensor=return_quant_tensor)
def __init__(self): super(Net, self).__init__() # Defining a 2D convolution layer self.conv1 = qnn.QuantConv2d(in_channels=1, out_channels=4, kernel_size=3, stride=1, padding=1, weight_quant_type=QuantType.INT, weight_bit_width=2, bias=False) self.relu1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=2, max_val=6) # Defining another 2D convolution layer self.conv2 = qnn.QuantConv2d(in_channels=4, out_channels=4, kernel_size=3, stride=1, padding=1, weight_quant_type=QuantType.INT, weight_bit_width=2, bias=False) self.relu2 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=2, max_val=6) self.fc1 = qnn.QuantLinear(in_features=4 * 7 * 7, out_features=10, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=2)
def __init__(self): super(QuantXORNet, self).__init__() self.relu0 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=4.0, max_val=8) self.linear1 = qnn.QuantLinear(in_features = 2, out_features=2, bias_quant_type=QuantType.INT, bias=True, compute_output_scale=True, compute_output_bit_width=True, #input_bit_width=32, weight_quant_type=QuantType.INT) self.relu1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=4, max_val=8) self.linear2 = qnn.QuantLinear(in_features = 2, out_features=1, bias_quant_type=QuantType.INT, bias=True, compute_output_scale=True, compute_output_bit_width=True, #bit_width=4, weight_quant_type=QuantType.INT)
def make_quant_relu(bit_width, quant_type=QUANT_TYPE, scaling_impl_type=ACT_SCALING_IMPL_TYPE, scaling_per_channel=ACT_SCALING_PER_CHANNEL, restrict_scaling_type=ACT_SCALING_RESTRICT_SCALING_TYPE, scaling_min_val=SCALING_MIN_VAL, max_val=ACT_MAX_VAL, return_quant_tensor=ACT_RETURN_QUANT_TENSOR, per_channel_broadcastable_shape=ACT_PER_CHANNEL_BROADCASTABLE_SHAPE): return qnn.QuantReLU(bit_width=bit_width, quant_type=quant_type, scaling_impl_type=scaling_impl_type, scaling_per_channel=scaling_per_channel, restrict_scaling_type=restrict_scaling_type, scaling_min_val=scaling_min_val, max_val=max_val, return_quant_tensor=return_quant_tensor, per_channel_broadcastable_shape=per_channel_broadcastable_shape)
def make_quant_relu(bit_width, scaling_impl_type = ACT_SCALING_IMPL_TYPE, scaling_per_channel = ACT_SCALING_PER_CHANNEL, restrict_scaling_type = ACT_SCALING_RESTRICT_SCALING_TYPE, scaling_min_val = SCALING_MIN_VAL, max_val = ACT_MAX_VAL, min_overall_bit_width = MIN_OVERALL_BW, max_overall_bit_width = MAX_OVERALL_BW, return_quant_tensor = ACT_RETURN_QUANT_TENSOR, per_channel_broadcastable_shape = ACT_PER_CHANNEL_BROADCASTABLE_SHAPE): '''Helper for ReLU activation layers''' quant_type = get_quant_type(bit_width) return qnn.QuantReLU(bit_width = bit_width, quant_type = quant_type, scaling_impl_type = scaling_impl_type, scaling_per_channel = scaling_per_channel, restrict_scaling_type = restrict_scaling_type, scaling_min_val = scaling_min_val, max_val = max_val, min_overall_bit_width = min_overall_bit_width, max_overall_bit_width = max_overall_bit_width, return_quant_tensor = return_quant_tensor, per_channel_broadcastable_shape = per_channel_broadcastable_shape)
def __init__(self): super(VGG, self).__init__() self.conv1 = qnn.QuantConv2d( in_channels=3, out_channels=64, kernel_size=3, padding=1, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relu1 = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=8, max_val=1 - 1 / 128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST) self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv2 = qnn.QuantConv2d( in_channels=64, out_channels=128, kernel_size=3, padding=1, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relu2 = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=8, max_val=1 - 1 / 128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST) self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv3 = qnn.QuantConv2d( in_channels=128, out_channels=256, kernel_size=3, padding=1, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relu3 = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=8, max_val=1 - 1 / 128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST) self.conv4 = qnn.QuantConv2d( in_channels=256, out_channels=256, kernel_size=3, padding=1, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relu4 = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=8, max_val=1 - 1 / 128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST) self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv5 = qnn.QuantConv2d( in_channels=256, out_channels=512, kernel_size=3, padding=1, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relu5 = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=8, max_val=1 - 1 / 128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST) self.conv6 = qnn.QuantConv2d( in_channels=512, out_channels=512, kernel_size=3, padding=1, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relu6 = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=8, max_val=1 - 1 / 128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST) self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv7 = qnn.QuantConv2d( in_channels=256, out_channels=512, kernel_size=3, padding=1, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relu7 = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=8, max_val=1 - 1 / 128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST) self.conv8 = qnn.QuantConv2d( in_channels=512, out_channels=512, kernel_size=3, padding=1, bias=False, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relu8 = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=8, max_val=1 - 1 / 128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST) self.pool5 = nn.MaxPool2d(kernel_size=2, stride=2) """ full precision layers self.fc1 = nn.Linear(4*4*256, 1024) self.relufc1 = nn.ReLU() self.fc2 = nn.Linear(1024,512) self.relufc2 = nn.ReLU() self.fc2 = nn.Linear(512, 10) """ self.fc1 = qnn.QuantLinear( 8192, 4096, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=32, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relufc1 = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=8, max_val=1 - 1 / 128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST) self.fc2 = qnn.QuantLinear( 4096, 1024, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relufc2 = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=8, max_val=1 - 1 / 128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST) self.fc3 = qnn.QuantLinear( 1024, 1024, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relufc3 = qnn.QuantReLU( quant_type=QuantType.INT, bit_width=8, max_val=1 - 1 / 128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST) self.fc4 = qnn.QuantLinear( 1024, 10, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0)
def make_PACT_relu(bit_width=8): relu = qnn.QuantReLU(bit_width=bit_width) relu.act_impl = PACTReLU() return relu
def __init__(self, is_train=False): super(PNet, self).__init__() self.is_train = is_train ''' conv1: (H-2)*(W-2)*10 prelu1: (H-2)*(W-2)*10 pool1: ((H-2)/2)*((W-2)/2)*10 conv2: ((H-2)/2-2)*((W-2)/2-2)*16 prelu2: ((H-2)/2-2)*((W-2)/2-2)*16 conv3: ((H-2)/2-4)*((W-2)/2-4)*32 prelu3: ((H-2)/2-4)*((W-2)/2-4)*32 conv4_1: ((H-2)/2-4)*((W-2)/2-4)*2 conv4_2: ((H-2)/2-4)*((W-2)/2-4)*4 The last feature map size is: (H - 10)/2 = (H - 12)/2 + 1. Thus the effect of PNet equals to moving 12*12 convolution window with kernel size 3, stirde 2. ''' self.features = nn.Sequential( OrderedDict([ ('conv1', qnn.QuantConv2d(3, 10, 3, 1, weight_quant_type=QuantType.INT, weight_bit_width=8)), ('prelu1', qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)), ('pool1', nn.MaxPool2d(2, 2, ceil_mode=False)), ('conv2', qnn.QuantConv2d(10, 16, 3, 1, weight_quant_type=QuantType.INT, weight_bit_width=8)), ('prelu2', qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)), ('conv3', qnn.QuantConv2d(16, 32, 3, 1, weight_quant_type=QuantType.INT, weight_bit_width=8)), ('prelu3', qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)), ])) self.conv4_1 = qnn.QuantConv2d(32, 2, 1, 1, weight_quant_type=QuantType.INT, weight_bit_width=8) self.conv4_2 = qnn.QuantConv2d(32, 4, 1, 1, weight_quant_type=QuantType.INT, weight_bit_width=8)
def __init__(self, is_train=False, train_landmarks=False): super(ONet, self).__init__() self.is_train = is_train self.train_landmarks = train_landmarks self.features = nn.Sequential( OrderedDict([ ('conv1', qnn.QuantConv2d(3, 32, 3, 1, weight_quant_type=QuantType.INT, weight_bit_width=8)), # 48 - 2 = 46 ('prelu1', qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)), ('pool1', nn.MaxPool2d(3, 2, ceil_mode=False)), # (46-3)/2 + 1 = 22 ('conv2', qnn.QuantConv2d(32, 64, 3, 1, weight_quant_type=QuantType.INT, weight_bit_width=8)), # 22 - 2 = 20 ('prelu2', qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)), ('pool2', nn.MaxPool2d(3, 2, ceil_mode=False)), # (20-3)/2 + 1 = 9 ('conv3', qnn.QuantConv2d(64, 64, 3, 1, weight_quant_type=QuantType.INT, bit_width=8, max_val=6)), # 9 - 2 = 7 ('prelu3', qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)), ('pool3', nn.MaxPool2d(2, 2, ceil_mode=False)), # (7-2)/2 + 1 = 3 ('conv4', qnn.QuantConv2d(64, 128, 2, 1, weight_quant_type=QuantType.INT, bit_width=8, max_val=6)), # 3 - 1 = 2 ('prelu4', qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)), ('flatten', Flatten()), ('conv5', qnn.QuantLinear(128 * 2 * 2, 256, weight_quant_type=QuantType.INT, bias=False, weight_bit_width=8)), ('prelu5', qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)), ('dropout', nn.Dropout(0.2)) ])) self.conv6_1 = qnn.QuantLinear(256, 2, weight_quant_type=QuantType.INT, bias=False, weight_bit_width=8) self.conv6_2 = qnn.QuantLinear(256, 4, weight_quant_type=QuantType.INT, bias=False, weight_bit_width=8) self.conv6_3 = qnn.QuantLinear(256, 10, weight_quant_type=QuantType.INT, bias=False, weight_bit_width=8)
def __init__(self, is_train=False): super(RNet, self).__init__() self.is_train = is_train self.features = nn.Sequential( OrderedDict([ ('conv1', qnn.QuantConv2d(3, 28, 3, 1, weight_quant_type=QuantType.INT, weight_bit_width=8)), # 24 -2 = 22 ('prelu1', qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)), ('pool1', nn.MaxPool2d(3, 2, ceil_mode=False)), # (22-3)/2 + 1 = 10 ('conv2', qnn.QuantConv2d(28, 48, 3, 1, weight_quant_type=QuantType.INT, weight_bit_width=8)), # 10 - 2 = 8 ('prelu2', qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)), ('pool2', nn.MaxPool2d(3, 2, ceil_mode=False)), # (8-3)/2 + 1 = 3 ('conv3', qnn.QuantConv2d(48, 64, 2, 1, weight_quant_type=QuantType.INT, weight_bit_width=8)), # 3 - 1 = 2 ('prelu3', qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)), ('flatten', Flatten()), ('conv4', qnn.QuantLinear(64 * 2 * 2, 128, weight_quant_type=QuantType.INT, bias=False, weight_bit_width=8)), ('prelu4', qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val=6)), #('dropout', nn.Dropout(0.2)) ])) self.conv5_1 = qnn.QuantLinear(128, 2, weight_quant_type=QuantType.INT, bias=False, weight_bit_width=8) self.conv5_2 = qnn.QuantLinear(128, 4, weight_quant_type=QuantType.INT, bias=False, weight_bit_width=8)
def __init__(self, masks, precision=8): self.weight_precision = precision self.quantized_model = True self.e1 = masks['enc1'] self.e2 = masks['enc2'] self.e3 = masks['enc3'] self.e4 = masks['enc4'] self.d1 = masks['dec1'] self.d2 = masks['dec2'] self.d3 = masks['dec3'] self.d4 = masks['dec4'] self.do = masks['dout'] super(t2_autoencoder_masked_bv, self).__init__() self.input_size = 640 self.bottleneck_size = 8 self.hidden_size = 128 # Encoder self.enc1 = qnn.QuantLinear(self.input_size, self.hidden_size, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.ebn1 = nn.BatchNorm1d(self.hidden_size) self.eact1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) self.enc2 = qnn.QuantLinear(self.hidden_size, self.hidden_size, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.ebn2 = nn.BatchNorm1d(self.hidden_size) self.eact2 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) self.enc3 = qnn.QuantLinear(self.hidden_size, self.hidden_size, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.ebn3 = nn.BatchNorm1d(self.hidden_size) self.eact3 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) self.enc4 = qnn.QuantLinear(self.hidden_size, self.bottleneck_size, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.ebn4 = nn.BatchNorm1d(self.bottleneck_size) self.eact4 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) # Decoder self.dec1 = qnn.QuantLinear(self.bottleneck_size, self.hidden_size, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.dbn1 = nn.BatchNorm1d(self.hidden_size) self.dact1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) self.dec2 = qnn.QuantLinear(self.hidden_size, self.hidden_size, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.dbn2 = nn.BatchNorm1d(self.hidden_size) self.dact2 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) self.dec3 = qnn.QuantLinear(self.hidden_size, self.hidden_size, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.dbn3 = nn.BatchNorm1d(self.hidden_size) self.dact3 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) self.dec4 = qnn.QuantLinear(self.hidden_size, self.hidden_size, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision) self.dbn4 = nn.BatchNorm1d(self.hidden_size) self.dact4 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=self.weight_precision, max_val=6) # Output self.dout = qnn.QuantLinear(self.hidden_size, self.input_size, bias=True, weight_quant_type=QuantType.INT, weight_bit_width=self.weight_precision)
def __init__(self): super(LeNet5, self).__init__() self.conv1 = qnn.QuantConv2d(in_channels= 1, out_channels= 20, kernel_size= 3, padding= 1, bias= False, weight_quant_type=QuantType.INT, weight_bit_width= total_bits, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relu1 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val= 1- 1/128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST ) self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv2 = qnn.QuantConv2d(in_channels= 20, out_channels= 50, kernel_size= 3, padding= 1, bias= False, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0 ) self.relu2 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val= 1- 1/128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST ) self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) """ # for 32-bit precision FC layers self.fc1 = nn.Linear(7*7*50, 500) self.relu3 = nn.ReLU() self.fc2 = nn.Linear(500,10) """ # for fixed-point precision FC layers self.fc1 = qnn.QuantLinear(7*7*50, 500, bias= True, weight_quant_type=QuantType.INT, weight_bit_width=32, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0) self.relu3 = qnn.QuantReLU(quant_type=QuantType.INT, bit_width=8, max_val= 1- 1/128.0, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST ) self.fc2 = qnn.QuantLinear(500, 10, bias= True, weight_quant_type=QuantType.INT, weight_bit_width=8, weight_restrict_scaling_type=RestrictValueType.POWER_OF_TWO, weight_scaling_impl_type=ScalingImplType.CONST, weight_scaling_const=1.0)
def __init__(self, VGG_type='A', batch_norm=False, bit_width=8, num_classes=1000, pretrained_model=None): super(QuantVGG, self).__init__() self.logger = get_logger(name=("{}{}".format(__name__, dist.get_rank()) if dist.is_initialized() else __name__)) self.inp_quant = qnn.QuantIdentity(bit_width=bit_width, act_quant=INPUT_QUANTIZER, return_quant_tensor=RETURN_QUANT_TENSOR) self.features = make_layers(cfgs[VGG_type], batch_norm, bit_width) self.avgpool = qnn.QuantAdaptiveAvgPool2d((7, 7)) self.classifier = nn.Sequential( qnn.QuantLinear(512 * 7 * 7, 4096, bias=True, bias_quant=BIAS_QUANTIZER, weight_quant=WEIGHT_QUANTIZER, weight_bit_width=bit_width, weight_scaling_min_val=SCALING_MIN_VAL, return_quant_tensor=RETURN_QUANT_TENSOR), qnn.QuantReLU(bit_width=bit_width, act_quant=ACT_QUANTIZER, return_quant_tensor=RETURN_QUANT_TENSOR), qnn.QuantDropout(), qnn.QuantLinear(4096, 4096, bias=True, bias_quant=BIAS_QUANTIZER, weight_quant=WEIGHT_QUANTIZER, weight_bit_width=bit_width, weight_scaling_min_val=SCALING_MIN_VAL, return_quant_tensor=RETURN_QUANT_TENSOR), qnn.QuantReLU(bit_width=bit_width, act_quant=ACT_QUANTIZER, return_quant_tensor=RETURN_QUANT_TENSOR), nn.Dropout(), qnn.QuantLinear(4096, num_classes, bias=False, weight_quant=WEIGHT_QUANTIZER, weight_scaling_min_val=SCALING_MIN_VAL, weight_bit_width=bit_width, return_quant_tensor=False), ) self.classifier[0].cache_inference_quant_bias = True self.classifier[3].cache_inference_quant_bias = True self.classifier[6].cache_inference_quant_bias = True if is_master(): print_config(self.logger) if pretrained_model == None: self._initialize_weights() else: pre_model = None if pretrained_model == 'pytorch': self.logger.info( "Initializing with pretrained model from PyTorch") # use pytorch's pretrained model pre_model = models.vgg16(pretrained=True) else: pre_model = VGG_net(VGG_type=VGG_type, batch_norm=batch_norm, num_classes=num_classes) loaded_model = torch.load(pretrained_model)['state_dict'] # check if model was trained using DataParallel, keys() return 'odict_keys' which does not support indexing if next(iter(loaded_model.keys())).startswith('module'): # if model is trained w/ DataParallel it's warraped under module pre_model = torch.nn.DataParallel(pre_model) pre_model.load_state_dict(loaded_model) unwrapped_sd = pre_model.module.state_dict() pre_model = VGG_net(VGG_type=VGG_type, batch_norm=batch_norm, num_classes=num_classes) pre_model.load_state_dict(unwrapped_sd) else: pre_model.load_state_dict(loaded_model) self._initialize_custom_weights(pre_model) self.logger.info("Initialization Done")