def __init__( self, num_classes=100, num_channels=3, dimensions=(32, 32), # pylint: disable=unused-argument bias=False, **kwargs ): super().__init__() self.conv1 = ai8x.FusedConv2dReLU(num_channels, 16, 3, stride=1, padding=1, bias=bias, **kwargs) self.conv2 = ai8x.FusedConv2dReLU(16, 20, 3, stride=1, padding=1, bias=bias, **kwargs) self.conv3 = ai8x.FusedConv2dReLU(20, 20, 3, stride=1, padding=1, bias=bias, **kwargs) self.conv4 = ai8x.FusedConv2dReLU(20, 20, 3, stride=1, padding=1, bias=bias, **kwargs) self.conv5 = ai8x.FusedMaxPoolConv2dReLU(20, 20, 3, pool_size=2, pool_stride=2, stride=1, padding=1, bias=bias, **kwargs) self.conv6 = ai8x.FusedConv2dReLU(20, 20, 3, stride=1, padding=1, bias=bias, **kwargs) self.conv7 = ai8x.FusedConv2dReLU(20, 44, 3, stride=1, padding=1, bias=bias, **kwargs) self.conv8 = ai8x.FusedMaxPoolConv2dReLU(44, 48, 3, pool_size=2, pool_stride=2, stride=1, padding=1, bias=bias, **kwargs) self.conv9 = ai8x.FusedConv2dReLU(48, 48, 3, stride=1, padding=1, bias=bias, **kwargs) self.conv10 = ai8x.FusedMaxPoolConv2dReLU(48, 96, 3, pool_size=2, pool_stride=2, stride=1, padding=1, bias=bias, **kwargs) self.conv11 = ai8x.FusedMaxPoolConv2dReLU(96, 512, 1, pool_size=2, pool_stride=2, padding=0, bias=bias, **kwargs) self.conv12 = ai8x.FusedConv2dReLU(512, 128, 1, stride=1, padding=0, bias=bias, **kwargs) self.conv13 = ai8x.FusedMaxPoolConv2dReLU(128, 128, 3, pool_size=2, pool_stride=2, stride=1, padding=1, bias=bias, **kwargs) self.conv14 = ai8x.Conv2d(128, num_classes, 1, stride=1, padding=0, bias=bias, wide=True, **kwargs)
def quantize_layer(q_fp_layer, wide, activation, num_bits): ''' Quantizes layer ''' ai8x.set_device(device=85, simulate=True, round_avg=False, verbose=False) in_channels = q_fp_layer.op.weight.shape[1] out_channels = q_fp_layer.op.weight.shape[0] kernel_size = q_fp_layer.op.weight.shape[2:] q_int_layer = ai8x.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, bias=False, wide=wide, activation=activation, weight_bits=num_bits, bias_bits=8, quantize_activation=True) out_shift = q_fp_layer.calc_out_shift(q_fp_layer.op.weight.detach(), q_fp_layer.output_shift.detach()) weight_scale = q_fp_layer.calc_weight_scale(out_shift) ai8x.set_device(device=85, simulate=False, round_avg=False, verbose=False) weight = q_fp_layer.clamp_weight(q_fp_layer.quantize_weight(weight_scale * q_fp_layer.op.weight)) q_int_weight = (2**(num_bits-1)) * weight q_int_layer.output_shift = torch.nn.Parameter( -torch.log2(weight_scale) # pylint: disable=no-member ) q_int_layer.op.weight = torch.nn.Parameter(q_int_weight) return q_int_layer
def quantize_fp_layer(fp_layer, wide, activation, num_bits): ''' Creates layer with quantized leveled fp32 weights from a fp32 weighted layer ''' ai8x.set_device(device=85, simulate=False, round_avg=False, verbose=False) in_channels = fp_layer.op.weight.shape[1] out_channels = fp_layer.op.weight.shape[0] kernel_size = fp_layer.op.weight.shape[2:] q_fp_layer = ai8x.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, bias=False, wide=wide, activation=activation, weight_bits=num_bits, bias_bits=8, quantize_activation=True) q_fp_layer.op.weight = copy.deepcopy(fp_layer.op.weight) return q_fp_layer
def create_conv2d_layer(in_channels, out_channels, kernel_size, wide, activation): ''' Creates randomly initialized layer ''' ai8x.set_device(device=85, simulate=False, round_avg=False, verbose=False) fp_layer = ai8x.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, bias=False, wide=wide, activation=activation) fp_layer.op.weight = torch.nn.Parameter( (2.0 * torch.rand(out_channels, # pylint: disable=no-member in_channels, kernel_size, kernel_size) - 1.0) ) return fp_layer
def __init__(self, num_classes=10, num_channels=3, dimensions=(28, 28), planes=60, pool=2, fc_inputs=12, bias=False, **kwargs): super().__init__() # AI85 Limits assert planes + num_channels <= ai8x.dev.WEIGHT_INPUTS assert planes + fc_inputs <= ai8x.dev.WEIGHT_DEPTH - 1 assert dimensions[0] == dimensions[1] # Only square supported # Keep track of image dimensions so one constructor works for all image sizes dim = dimensions[0] self.conv1 = ai8x.FusedConv2dReLU(num_channels, planes, 3, padding=1, bias=bias, **kwargs) # padding 1 -> no change in dimensions -> MNIST: 28x28 | CIFAR: 32x32 pad = 2 if dim == 28 else 1 self.conv2 = ai8x.FusedMaxPoolConv2dReLU(planes, planes, 3, pool_size=2, pool_stride=2, padding=pad, bias=bias, **kwargs) dim //= 2 # pooling, padding 0 -> MNIST: 14x14 | CIFAR: 16x16 if pad == 2: dim += 2 # MNIST: padding 2 -> 16x16 | CIFAR: padding 1 -> 16x16 self.conv3 = ai8x.FusedMaxPoolConv2dReLU(planes, ai8x.dev.WEIGHT_DEPTH - planes - fc_inputs, 3, pool_size=2, pool_stride=2, padding=1, bias=bias, **kwargs) dim //= 2 # pooling, padding 0 -> 8x8 # padding 1 -> no change in dimensions self.conv4 = ai8x.FusedAvgPoolConv2dReLU(ai8x.dev.WEIGHT_DEPTH - planes - fc_inputs, fc_inputs, 3, pool_size=pool, pool_stride=2, padding=1, bias=bias, **kwargs) dim //= pool # pooling, padding 0 -> 4x4 # padding 1 -> no change in dimensions self.conv5 = ai8x.Conv2d(fc_inputs * dim * dim, num_classes, 1, padding=0, bias=None, **kwargs) # 10x1x1 for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, bias=False, se_ratio=None, expand_ratio=1, fused=False, **kwargs): super().__init__() self.has_se = (se_ratio is not None) and (0 < se_ratio <= 1) self.in_channels = in_channels self.out_channels = out_channels self.stride = stride self.expand_ratio = expand_ratio self.fused = fused # Expansion phase (Inverted Bottleneck) inp = in_channels # number of input channels out = in_channels * expand_ratio # number of output channels if expand_ratio != 1: if fused is True: self.expand_conv = ai8x.FusedConv2dBNReLU( inp, out, kernel_size=kernel_size, padding=1, batchnorm='Affine', bias=bias, eps=1e-03, momentum=0.01, **kwargs) else: self.expand_conv = ai8x.FusedConv2dBNReLU(inp, out, 1, batchnorm='Affine', bias=bias, eps=1e-03, momentum=0.01, **kwargs) # Depthwise Convolution phase if fused is not True: self.depthwise_conv = ai8x.FusedConv2dBNReLU( in_channels=out, out_channels=out, groups=out, # groups makes it depthwise padding=1, kernel_size=kernel_size, stride=stride, batchnorm='Affine', bias=bias, eps=1e-03, momentum=0.01, **kwargs) # Squeeze and Excitation phase if self.has_se: num_squeezed_channels = max(1, int(in_channels * se_ratio)) self.se_reduce = ai8x.FusedConv2dReLU( in_channels=out, out_channels=num_squeezed_channels, kernel_size=1, stride=1, bias=bias, **kwargs) self.se_expand = ai8x.Conv2d(in_channels=num_squeezed_channels, out_channels=out, kernel_size=1, stride=1, bias=bias, **kwargs) # Output Convolution phase final_out = out_channels self.project_conv = ai8x.FusedConv2dBN(in_channels=out, out_channels=final_out, kernel_size=1, batchnorm='Affine', bias=bias, eps=1e-03, momentum=0.01, **kwargs) # Skip connection self.resid = ai8x.Add()