def forward(self, input): weight1 = self.weight * self.scale + (self.weight - self.weight * self.scale).detach() weight = weight1 + (wage_quantizer.Q(weight1,self.wl_weight) -weight1).detach() output= F.conv2d(input, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) output = output/self.scale output = wage_quantizer.WAGEQuantizer_f(output, self.wl_activate, self.wl_error) return output
def forward(self, input): weight1 = self.weight * self.scale + (self.weight - self.weight * self.scale).detach() weight = weight1 + (wage_quantizer.Q(weight1,self.wl_weight) -weight1).detach() output = F.linear(input, weight, self.bias) output = output/self.scale output = wage_quantizer.WAGEQuantizer_f(output,self.wl_activate, self.wl_error) return output
def forward(self, input): weight1 = self.weight * self.scale + ( self.weight - self.weight * self.scale).detach() weight = weight1 + (wage_quantizer.Q(weight1, self.wl_weight) - weight1).detach() outputOrignal = F.conv2d(input, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) bitWeight = int(self.wl_weight) bitActivation = int(self.wl_input) if self.inference == 1: # retention weight = wage_quantizer.Retention(weight, self.t, self.v, self.detect, self.target) # set parameters for Hardware Inference onoffratio = self.onoffratio upper = 1 lower = 1 / onoffratio output = torch.zeros_like(outputOrignal) del outputOrignal cellRange = 2**self.cellBit # cell precision is 4 # Now consider on/off ratio dummyP = torch.zeros_like(weight) dummyP[:, :, :, :] = (cellRange - 1) * (upper + lower) / 2 for i in range(3): for j in range(3): # need to divide to different subArray numSubArray = int(weight.shape[1] / self.subArray) # cut into different subArrays if numSubArray == 0: mask = torch.zeros_like(weight) mask[:, :, i, j] = 1 if weight.shape[1] == 3: # after get the spacial kernel, need to transfer floating weight [-1, 1] to binarized ones X_decimal = torch.round((2**bitWeight - 1) / 2 * (weight + 1) + 0) * mask outputP = torch.zeros_like(output) outputD = torch.zeros_like(output) for k in range(int(bitWeight / self.cellBit)): remainder = torch.fmod(X_decimal, cellRange) * mask variation = np.random.normal( 0, self.vari, list(weight.size())).astype(np.float32) X_decimal = torch.round( (X_decimal - remainder) / cellRange) * mask # Now also consider weight has on/off ratio effects # Here remainder is the weight mapped to Hardware, so we introduce on/off ratio in this value # the range of remainder is [0, cellRange-1], we truncate it to [lower, upper] remainderQ = (upper - lower) * ( remainder - 0 ) + ( cellRange - 1 ) * lower # weight cannot map to 0, but to Gmin remainderQ = remainderQ + remainderQ * torch.from_numpy( variation).cuda() outputPartial = F.conv2d( input, remainderQ * mask, self.bias, self.stride, self.padding, self.dilation, self.groups) outputDummyPartial = F.conv2d( input, dummyP * mask, self.bias, self.stride, self.padding, self.dilation, self.groups) scaler = cellRange**k outputP = outputP + outputPartial * scaler * 2 / ( 1 - 1 / onoffratio) outputD = outputD + outputDummyPartial * scaler * 2 / ( 1 - 1 / onoffratio) outputP = outputP - outputD output = output + outputP else: # quantize input into binary sequence inputQ = torch.round((2**bitActivation - 1) / 1 * (input - 0) + 0) outputIN = torch.zeros_like(output) for z in range(bitActivation): inputB = torch.fmod(inputQ, 2) inputQ = torch.round((inputQ - inputB) / 2) outputP = torch.zeros_like(output) # after get the spacial kernel, need to transfer floating weight [-1, 1] to binarized ones X_decimal = torch.round( (2**bitWeight - 1) / 2 * (weight + 1) + 0) * mask outputD = torch.zeros_like(output) for k in range(int(bitWeight / self.cellBit)): remainder = torch.fmod( X_decimal, cellRange) * mask variation = np.random.normal( 0, self.vari, list(weight.size())).astype(np.float32) X_decimal = torch.round( (X_decimal - remainder) / cellRange) * mask # Now also consider weight has on/off ratio effects # Here remainder is the weight mapped to Hardware, so we introduce on/off ratio in this value # the range of remainder is [0, cellRange-1], we truncate it to [lower, upper] remainderQ = (upper - lower) * ( remainder - 0 ) + ( cellRange - 1 ) * lower # weight cannot map to 0, but to Gmin remainderQ = remainderQ + remainderQ * torch.from_numpy( variation).cuda() outputPartial = F.conv2d( input, remainderQ * mask, self.bias, self.stride, self.padding, self.dilation, self.groups) outputDummyPartial = F.conv2d( input, dummyP * mask, self.bias, self.stride, self.padding, self.dilation, self.groups) # Add ADC quanization effects here !!! outputPartialQ = wage_quantizer.LinearQuantizeOut( outputPartial, self.ADCprecision) outputDummyPartialQ = wage_quantizer.LinearQuantizeOut( outputDummyPartial, self.ADCprecision) scaler = cellRange**k outputP = outputP + outputPartialQ * scaler * 2 / ( 1 - 1 / onoffratio) outputD = outputD + outputDummyPartialQ * scaler * 2 / ( 1 - 1 / onoffratio) scalerIN = 2**z outputIN = outputIN + (outputP - outputD) * scalerIN output = output + outputIN / (2**bitActivation) else: # quantize input into binary sequence inputQ = torch.round((2**bitActivation - 1) / 1 * (input - 0) + 0) outputIN = torch.zeros_like(output) for z in range(bitActivation): inputB = torch.fmod(inputQ, 2) inputQ = torch.round((inputQ - inputB) / 2) outputP = torch.zeros_like(output) for s in range(numSubArray): mask = torch.zeros_like(weight) mask[:, (s * self.subArray):(s + 1) * self.subArray, i, j] = 1 # after get the spacial kernel, need to transfer floating weight [-1, 1] to binarized ones X_decimal = torch.round( (2**bitWeight - 1) / 2 * (weight + 1) + 0) * mask outputSP = torch.zeros_like(output) outputD = torch.zeros_like(output) for k in range(int(bitWeight / self.cellBit)): remainder = torch.fmod( X_decimal, cellRange) * mask variation = np.random.normal( 0, self.vari, list(weight.size())).astype(np.float32) X_decimal = torch.round( (X_decimal - remainder) / cellRange) * mask # Now also consider weight has on/off ratio effects # Here remainder is the weight mapped to Hardware, so we introduce on/off ratio in this value # the range of remainder is [0, cellRange-1], we truncate it to [lower, upper]*(cellRange-1) remainderQ = (upper - lower) * ( remainder - 0 ) + ( cellRange - 1 ) * lower # weight cannot map to 0, but to Gmin remainderQ = remainderQ + remainderQ * torch.from_numpy( variation).cuda() outputPartial = F.conv2d( inputB, remainderQ * mask, self.bias, self.stride, self.padding, self.dilation, self.groups) outputDummyPartial = F.conv2d( inputB, dummyP * mask, self.bias, self.stride, self.padding, self.dilation, self.groups) # Add ADC quanization effects here !!! outputPartialQ = wage_quantizer.LinearQuantizeOut( outputPartial, self.ADCprecision) outputDummyPartialQ = wage_quantizer.LinearQuantizeOut( outputDummyPartial, self.ADCprecision) scaler = cellRange**k outputSP = outputSP + outputPartialQ * scaler * 2 / ( 1 - 1 / onoffratio) outputD = outputD + outputDummyPartialQ * scaler * 2 / ( 1 - 1 / onoffratio) if (weight.shape[0] == 256) & (weight.shape[1] == 128): weightMatrix = ( remainderQ * mask).cpu().data.numpy() weight_file_name = './layer_record/weightForLayer3_subarray' + str( s) + '_weightBitNo_' + str( k) + ".csv" cout = weightMatrix.shape[0] weight_matrix = weightMatrix.reshape( cout, -1).transpose() np.savetxt(weight_file_name, weight_matrix, delimiter=",", fmt='%10.5f') # !!! Important !!! the dummy need to be multiplied by a ratio outputSP = outputSP - outputD # minus dummy column outputP = outputP + outputSP scalerIN = 2**z outputIN = outputIN + outputP * scalerIN output = output + outputIN / (2**bitActivation) output = output / ( 2**bitWeight ) # since weight range was convert from [-1, 1] to [-256, 256] else: # original WAGE QCov2d weight1 = self.weight * self.scale + ( self.weight - self.weight * self.scale).detach() weight = weight1 + (wage_quantizer.Q(weight1, self.wl_weight) - weight1).detach() output = F.conv2d(input, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) output = output / self.scale output = wage_quantizer.WAGEQuantizer_f(output, self.wl_activate, self.wl_error) return output
def forward(self, input): weight1 = self.weight * self.scale + ( self.weight - self.weight * self.scale).detach() weight = weight1 + (wage_quantizer.Q(weight1, self.wl_weight) - weight1).detach() outputOrignal = F.linear(input, weight, self.bias) output = torch.zeros_like(outputOrignal) bitWeight = int(self.wl_weight) bitActivation = int(self.wl_input) if self.inference == 1: # retention weight = wage_quantizer.Retention(weight, self.t, self.v, self.detect, self.target) # set parameters for Hardware Inference onoffratio = self.onoffratio upper = 1 lower = 1 / onoffratio output = torch.zeros_like(outputOrignal) cellRange = 2**self.cellBit # cell precision is 4 # Now consider on/off ratio dummyP = torch.zeros_like(weight) dummyP[:, :] = (cellRange - 1) * (upper + lower) / 2 # need to divide to different subArray numSubArray = int(weight.shape[1] / self.subArray) if numSubArray == 0: mask = torch.zeros_like(weight) mask[:, :] = 1 # quantize input into binary sequence inputQ = torch.round((2**bitActivation - 1) / 1 * (input - 0) + 0) outputIN = torch.zeros_like(outputOrignal) for z in range(bitActivation): inputB = torch.fmod(inputQ, 2) inputQ = torch.round((inputQ - inputB) / 2) # after get the spacial kernel, need to transfer floating weight [-1, 1] to binarized ones X_decimal = torch.round((2**bitWeight - 1) / 2 * (weight + 1) + 0) * mask outputP = torch.zeros_like(outputOrignal) outputD = torch.zeros_like(outputOrignal) for k in range(int(bitWeight / self.cellBit)): remainder = torch.fmod(X_decimal, cellRange) * mask variation = np.random.normal( 0, self.vari, list(weight.size())).astype(np.float32) X_decimal = torch.round( (X_decimal - remainder) / cellRange) * mask # Now also consider weight has on/off ratio effects # Here remainder is the weight mapped to Hardware, so we introduce on/off ratio in this value # the range of remainder is [0, cellRange-1], we truncate it to [lower, upper] remainderQ = (upper - lower) * (remainder - 0) + ( cellRange - 1) * lower # weight cannot map to 0, but to Gmin remainderQ = remainderQ + remainderQ * torch.from_numpy( variation).cuda() outputPartial = F.linear(input, remainderQ * mask, self.bias) outputDummyPartial = F.linear(input, dummyP * mask, self.bias) # Add ADC quanization effects here !!! outputPartialQ = wage_quantizer.LinearQuantizeOut( outputPartial, self.ADCprecision) outputDummyPartialQ = wage_quantizer.LinearQuantizeOut( outputDummyPartial, self.ADCprecision) scaler = cellRange**k outputP = outputP + outputPartialQ * scaler * 2 / ( 1 - 1 / onoffratio) outputD = outputD + outputDummyPartialQ * scaler * 2 / ( 1 - 1 / onoffratio) scalerIN = 2**z outputIN = outputIN + (outputP - outputD) * scalerIN output = output + outputIN / (2**bitActivation) else: inputQ = torch.round((2**bitActivation - 1) / 1 * (input - 0) + 0) outputIN = torch.zeros_like(outputOrignal) for z in range(bitActivation): inputB = torch.fmod(inputQ, 2) inputQ = torch.round((inputQ - inputB) / 2) outputP = torch.zeros_like(outputOrignal) for s in range(numSubArray): mask = torch.zeros_like(weight) mask[:, (s * self.subArray):(s + 1) * self.subArray] = 1 # after get the spacial kernel, need to transfer floating weight [-1, 1] to binarized ones X_decimal = torch.round((2**bitWeight - 1) / 2 * (weight + 1) + 0) * mask outputSP = torch.zeros_like(outputOrignal) outputD = torch.zeros_like(outputOrignal) for k in range(int(bitWeight / self.cellBit)): remainder = torch.fmod(X_decimal, cellRange) * mask variation = np.random.normal( 0, self.vari, list(remainder.size())).astype(np.float32) X_decimal = torch.round( (X_decimal - remainder) / cellRange) * mask # Now also consider weight has on/off ratio effects # Here remainder is the weight mapped to Hardware, so we introduce on/off ratio in this value # the range of remainder is [0, cellRange-1], we truncate it to [lower, upper]*(cellRange-1) remainderQ = (upper - lower) * (remainder - 0) + ( cellRange - 1 ) * lower # weight cannot map to 0, but to Gmin remainderQ = remainderQ + remainderQ * torch.from_numpy( variation).cuda() outputPartial = F.linear(input, remainderQ * mask, self.bias) outputDummyPartial = F.linear( input, dummyP * mask, self.bias) # Add ADC quanization effects here !!! outputPartialQ = wage_quantizer.LinearQuantizeOut( outputPartial, self.ADCprecision) outputDummyPartialQ = wage_quantizer.LinearQuantizeOut( outputDummyPartial, self.ADCprecision) scaler = cellRange**k outputSP = outputSP + outputPartialQ * scaler * 2 / ( 1 - 1 / onoffratio) outputD = outputD + outputDummyPartialQ * scaler * 2 / ( 1 - 1 / onoffratio) outputSP = outputSP - outputD # minus dummy column outputP = outputP + outputSP scalerIN = 2**z outputIN = outputIN + outputP * scalerIN output = output + outputIN / (2**bitActivation) output = output / (2**bitWeight) else: # original WAGE QCov2d weight1 = self.weight * self.scale + ( self.weight - self.weight * self.scale).detach() weight = weight1 + (wage_quantizer.Q(weight1, self.wl_weight) - weight1).detach() output = F.linear(input, weight, self.bias) output = output / self.scale output = wage_quantizer.WAGEQuantizer_f(output, self.wl_activate, self.wl_error) return output