def forward(self, input): weight1 = self.weight * self.scale + (self.weight - self.weight * self.scale).detach() weight = weight1 + (wage_quantizer.Q(weight1,self.wl_weight) -weight1).detach() output= F.conv2d(input, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) output = output/self.scale output = wage_quantizer.WAGEQuantizer_f(output, self.wl_activate, self.wl_error) return output
def forward(self, input): weight1 = self.weight * self.scale + (self.weight - self.weight * self.scale).detach() weight = weight1 + (wage_quantizer.Q(weight1,self.wl_weight) -weight1).detach() output = F.linear(input, weight, self.bias) output = output/self.scale output = wage_quantizer.WAGEQuantizer_f(output,self.wl_activate, self.wl_error) return output
def Neural_Sim(self, input, output): input_file_name = './layer_record/input' + str(self.name) + '.csv' weight_file_name = './layer_record/weight' + str(self.name) + '.csv' f = open('./layer_record/trace_command.sh', "a") f.write(weight_file_name + ' ' + input_file_name + ' ') weight_q = wage_quantizer.Q(self.weight, self.wl_weight) write_matrix_weight(weight_q.cpu().data.numpy(), weight_file_name) if len(self.weight.shape) > 2: k = self.weight.shape[-1] write_matrix_activation_conv( stretch_input(input[0].cpu().data.numpy(), k), None, self.wl_input, input_file_name) else: write_matrix_activation_fc(input[0].cpu().data.numpy(), None, self.wl_input, input_file_name)
def Neural_Sim(self, input, output): input_file_name = './layer_record/input' + str(self.name) + '.csv' weight_file_name = './layer_record/weight' + str(self.name) + '.csv' weightOld_file_name = './layer_record/weightOld' + str(self.name) + '.csv' f = open('./layer_record/trace_command.sh', "a") input_activity = open('./input_activity.csv', "a") weight_q = wage_quantizer.Q(self.weight,self.wl_weight) write_matrix_weight( weight_q.cpu().data.numpy(),weight_file_name) if len(self.weight.shape) > 2: k=self.weight.shape[-1] padding = self.padding stride = self.stride activity = write_matrix_activation_conv(stretch_input(input[0].cpu().data.numpy(),k,padding,stride),None,self.wl_input,input_file_name) input_activity.write(str(activity)+",") else: activity = write_matrix_activation_fc(input[0].cpu().data.numpy(),None ,self.wl_input, input_file_name) if (str(self.name) == 'FC2_'): input_activity.write(str(activity)+"\n") else: input_activity.write(str(activity)+",") f.write(weight_file_name+' '+weightOld_file_name+' '+input_file_name+' '+str(activity)+' ')
def Neural_Sim(self, input, output): global model_n, FP print("quantize layer ", self.name) input_file_name = './layer_record_' + str(model_n) + '/input' + str( self.name) + '.csv' weight_file_name = './layer_record_' + str(model_n) + '/weight' + str( self.name) + '.csv' f = open('./layer_record_' + str(model_n) + '/trace_command.sh', "a") f.write(weight_file_name + ' ' + input_file_name + ' ') if FP: weight_q = float_quantizer.float_range_quantize( self.weight, self.wl_weight) else: weight_q = wage_quantizer.Q(self.weight, self.wl_weight) write_matrix_weight(weight_q.cpu().data.numpy(), weight_file_name) if len(self.weight.shape) > 2: k = self.weight.shape[-1] write_matrix_activation_conv( stretch_input(input[0].cpu().data.numpy(), k), None, self.wl_input, input_file_name) else: write_matrix_activation_fc(input[0].cpu().data.numpy(), None, self.wl_input, input_file_name)
def forward(self, input): weight1 = self.weight * self.scale + ( self.weight - self.weight * self.scale).detach() weight = weight1 + (wage_quantizer.Q(weight1, self.wl_weight) - weight1).detach() outputOrignal = F.conv2d(input, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) bitWeight = int(self.wl_weight) bitActivation = int(self.wl_input) if self.inference == 1: # retention weight = wage_quantizer.Retention(weight, self.t, self.v, self.detect, self.target) # set parameters for Hardware Inference onoffratio = self.onoffratio upper = 1 lower = 1 / onoffratio output = torch.zeros_like(outputOrignal) del outputOrignal cellRange = 2**self.cellBit # cell precision is 4 # Now consider on/off ratio dummyP = torch.zeros_like(weight) dummyP[:, :, :, :] = (cellRange - 1) * (upper + lower) / 2 for i in range(3): for j in range(3): # need to divide to different subArray numSubArray = int(weight.shape[1] / self.subArray) # cut into different subArrays if numSubArray == 0: mask = torch.zeros_like(weight) mask[:, :, i, j] = 1 if weight.shape[1] == 3: # after get the spacial kernel, need to transfer floating weight [-1, 1] to binarized ones X_decimal = torch.round((2**bitWeight - 1) / 2 * (weight + 1) + 0) * mask outputP = torch.zeros_like(output) outputD = torch.zeros_like(output) for k in range(int(bitWeight / self.cellBit)): remainder = torch.fmod(X_decimal, cellRange) * mask variation = np.random.normal( 0, self.vari, list(weight.size())).astype(np.float32) X_decimal = torch.round( (X_decimal - remainder) / cellRange) * mask # Now also consider weight has on/off ratio effects # Here remainder is the weight mapped to Hardware, so we introduce on/off ratio in this value # the range of remainder is [0, cellRange-1], we truncate it to [lower, upper] remainderQ = (upper - lower) * ( remainder - 0 ) + ( cellRange - 1 ) * lower # weight cannot map to 0, but to Gmin remainderQ = remainderQ + remainderQ * torch.from_numpy( variation).cuda() outputPartial = F.conv2d( input, remainderQ * mask, self.bias, self.stride, self.padding, self.dilation, self.groups) outputDummyPartial = F.conv2d( input, dummyP * mask, self.bias, self.stride, self.padding, self.dilation, self.groups) scaler = cellRange**k outputP = outputP + outputPartial * scaler * 2 / ( 1 - 1 / onoffratio) outputD = outputD + outputDummyPartial * scaler * 2 / ( 1 - 1 / onoffratio) outputP = outputP - outputD output = output + outputP else: # quantize input into binary sequence inputQ = torch.round((2**bitActivation - 1) / 1 * (input - 0) + 0) outputIN = torch.zeros_like(output) for z in range(bitActivation): inputB = torch.fmod(inputQ, 2) inputQ = torch.round((inputQ - inputB) / 2) outputP = torch.zeros_like(output) # after get the spacial kernel, need to transfer floating weight [-1, 1] to binarized ones X_decimal = torch.round( (2**bitWeight - 1) / 2 * (weight + 1) + 0) * mask outputD = torch.zeros_like(output) for k in range(int(bitWeight / self.cellBit)): remainder = torch.fmod( X_decimal, cellRange) * mask variation = np.random.normal( 0, self.vari, list(weight.size())).astype(np.float32) X_decimal = torch.round( (X_decimal - remainder) / cellRange) * mask # Now also consider weight has on/off ratio effects # Here remainder is the weight mapped to Hardware, so we introduce on/off ratio in this value # the range of remainder is [0, cellRange-1], we truncate it to [lower, upper] remainderQ = (upper - lower) * ( remainder - 0 ) + ( cellRange - 1 ) * lower # weight cannot map to 0, but to Gmin remainderQ = remainderQ + remainderQ * torch.from_numpy( variation).cuda() outputPartial = F.conv2d( input, remainderQ * mask, self.bias, self.stride, self.padding, self.dilation, self.groups) outputDummyPartial = F.conv2d( input, dummyP * mask, self.bias, self.stride, self.padding, self.dilation, self.groups) # Add ADC quanization effects here !!! outputPartialQ = wage_quantizer.LinearQuantizeOut( outputPartial, self.ADCprecision) outputDummyPartialQ = wage_quantizer.LinearQuantizeOut( outputDummyPartial, self.ADCprecision) scaler = cellRange**k outputP = outputP + outputPartialQ * scaler * 2 / ( 1 - 1 / onoffratio) outputD = outputD + outputDummyPartialQ * scaler * 2 / ( 1 - 1 / onoffratio) scalerIN = 2**z outputIN = outputIN + (outputP - outputD) * scalerIN output = output + outputIN / (2**bitActivation) else: # quantize input into binary sequence inputQ = torch.round((2**bitActivation - 1) / 1 * (input - 0) + 0) outputIN = torch.zeros_like(output) for z in range(bitActivation): inputB = torch.fmod(inputQ, 2) inputQ = torch.round((inputQ - inputB) / 2) outputP = torch.zeros_like(output) for s in range(numSubArray): mask = torch.zeros_like(weight) mask[:, (s * self.subArray):(s + 1) * self.subArray, i, j] = 1 # after get the spacial kernel, need to transfer floating weight [-1, 1] to binarized ones X_decimal = torch.round( (2**bitWeight - 1) / 2 * (weight + 1) + 0) * mask outputSP = torch.zeros_like(output) outputD = torch.zeros_like(output) for k in range(int(bitWeight / self.cellBit)): remainder = torch.fmod( X_decimal, cellRange) * mask variation = np.random.normal( 0, self.vari, list(weight.size())).astype(np.float32) X_decimal = torch.round( (X_decimal - remainder) / cellRange) * mask # Now also consider weight has on/off ratio effects # Here remainder is the weight mapped to Hardware, so we introduce on/off ratio in this value # the range of remainder is [0, cellRange-1], we truncate it to [lower, upper]*(cellRange-1) remainderQ = (upper - lower) * ( remainder - 0 ) + ( cellRange - 1 ) * lower # weight cannot map to 0, but to Gmin remainderQ = remainderQ + remainderQ * torch.from_numpy( variation).cuda() outputPartial = F.conv2d( inputB, remainderQ * mask, self.bias, self.stride, self.padding, self.dilation, self.groups) outputDummyPartial = F.conv2d( inputB, dummyP * mask, self.bias, self.stride, self.padding, self.dilation, self.groups) # Add ADC quanization effects here !!! outputPartialQ = wage_quantizer.LinearQuantizeOut( outputPartial, self.ADCprecision) outputDummyPartialQ = wage_quantizer.LinearQuantizeOut( outputDummyPartial, self.ADCprecision) scaler = cellRange**k outputSP = outputSP + outputPartialQ * scaler * 2 / ( 1 - 1 / onoffratio) outputD = outputD + outputDummyPartialQ * scaler * 2 / ( 1 - 1 / onoffratio) if (weight.shape[0] == 256) & (weight.shape[1] == 128): weightMatrix = ( remainderQ * mask).cpu().data.numpy() weight_file_name = './layer_record/weightForLayer3_subarray' + str( s) + '_weightBitNo_' + str( k) + ".csv" cout = weightMatrix.shape[0] weight_matrix = weightMatrix.reshape( cout, -1).transpose() np.savetxt(weight_file_name, weight_matrix, delimiter=",", fmt='%10.5f') # !!! Important !!! the dummy need to be multiplied by a ratio outputSP = outputSP - outputD # minus dummy column outputP = outputP + outputSP scalerIN = 2**z outputIN = outputIN + outputP * scalerIN output = output + outputIN / (2**bitActivation) output = output / ( 2**bitWeight ) # since weight range was convert from [-1, 1] to [-256, 256] else: # original WAGE QCov2d weight1 = self.weight * self.scale + ( self.weight - self.weight * self.scale).detach() weight = weight1 + (wage_quantizer.Q(weight1, self.wl_weight) - weight1).detach() output = F.conv2d(input, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) output = output / self.scale output = wage_quantizer.WAGEQuantizer_f(output, self.wl_activate, self.wl_error) return output
def forward(self, input): weight1 = self.weight * self.scale + ( self.weight - self.weight * self.scale).detach() weight = weight1 + (wage_quantizer.Q(weight1, self.wl_weight) - weight1).detach() outputOrignal = F.linear(input, weight, self.bias) output = torch.zeros_like(outputOrignal) bitWeight = int(self.wl_weight) bitActivation = int(self.wl_input) if self.inference == 1: # retention weight = wage_quantizer.Retention(weight, self.t, self.v, self.detect, self.target) # set parameters for Hardware Inference onoffratio = self.onoffratio upper = 1 lower = 1 / onoffratio output = torch.zeros_like(outputOrignal) cellRange = 2**self.cellBit # cell precision is 4 # Now consider on/off ratio dummyP = torch.zeros_like(weight) dummyP[:, :] = (cellRange - 1) * (upper + lower) / 2 # need to divide to different subArray numSubArray = int(weight.shape[1] / self.subArray) if numSubArray == 0: mask = torch.zeros_like(weight) mask[:, :] = 1 # quantize input into binary sequence inputQ = torch.round((2**bitActivation - 1) / 1 * (input - 0) + 0) outputIN = torch.zeros_like(outputOrignal) for z in range(bitActivation): inputB = torch.fmod(inputQ, 2) inputQ = torch.round((inputQ - inputB) / 2) # after get the spacial kernel, need to transfer floating weight [-1, 1] to binarized ones X_decimal = torch.round((2**bitWeight - 1) / 2 * (weight + 1) + 0) * mask outputP = torch.zeros_like(outputOrignal) outputD = torch.zeros_like(outputOrignal) for k in range(int(bitWeight / self.cellBit)): remainder = torch.fmod(X_decimal, cellRange) * mask variation = np.random.normal( 0, self.vari, list(weight.size())).astype(np.float32) X_decimal = torch.round( (X_decimal - remainder) / cellRange) * mask # Now also consider weight has on/off ratio effects # Here remainder is the weight mapped to Hardware, so we introduce on/off ratio in this value # the range of remainder is [0, cellRange-1], we truncate it to [lower, upper] remainderQ = (upper - lower) * (remainder - 0) + ( cellRange - 1) * lower # weight cannot map to 0, but to Gmin remainderQ = remainderQ + remainderQ * torch.from_numpy( variation).cuda() outputPartial = F.linear(input, remainderQ * mask, self.bias) outputDummyPartial = F.linear(input, dummyP * mask, self.bias) # Add ADC quanization effects here !!! outputPartialQ = wage_quantizer.LinearQuantizeOut( outputPartial, self.ADCprecision) outputDummyPartialQ = wage_quantizer.LinearQuantizeOut( outputDummyPartial, self.ADCprecision) scaler = cellRange**k outputP = outputP + outputPartialQ * scaler * 2 / ( 1 - 1 / onoffratio) outputD = outputD + outputDummyPartialQ * scaler * 2 / ( 1 - 1 / onoffratio) scalerIN = 2**z outputIN = outputIN + (outputP - outputD) * scalerIN output = output + outputIN / (2**bitActivation) else: inputQ = torch.round((2**bitActivation - 1) / 1 * (input - 0) + 0) outputIN = torch.zeros_like(outputOrignal) for z in range(bitActivation): inputB = torch.fmod(inputQ, 2) inputQ = torch.round((inputQ - inputB) / 2) outputP = torch.zeros_like(outputOrignal) for s in range(numSubArray): mask = torch.zeros_like(weight) mask[:, (s * self.subArray):(s + 1) * self.subArray] = 1 # after get the spacial kernel, need to transfer floating weight [-1, 1] to binarized ones X_decimal = torch.round((2**bitWeight - 1) / 2 * (weight + 1) + 0) * mask outputSP = torch.zeros_like(outputOrignal) outputD = torch.zeros_like(outputOrignal) for k in range(int(bitWeight / self.cellBit)): remainder = torch.fmod(X_decimal, cellRange) * mask variation = np.random.normal( 0, self.vari, list(remainder.size())).astype(np.float32) X_decimal = torch.round( (X_decimal - remainder) / cellRange) * mask # Now also consider weight has on/off ratio effects # Here remainder is the weight mapped to Hardware, so we introduce on/off ratio in this value # the range of remainder is [0, cellRange-1], we truncate it to [lower, upper]*(cellRange-1) remainderQ = (upper - lower) * (remainder - 0) + ( cellRange - 1 ) * lower # weight cannot map to 0, but to Gmin remainderQ = remainderQ + remainderQ * torch.from_numpy( variation).cuda() outputPartial = F.linear(input, remainderQ * mask, self.bias) outputDummyPartial = F.linear( input, dummyP * mask, self.bias) # Add ADC quanization effects here !!! outputPartialQ = wage_quantizer.LinearQuantizeOut( outputPartial, self.ADCprecision) outputDummyPartialQ = wage_quantizer.LinearQuantizeOut( outputDummyPartial, self.ADCprecision) scaler = cellRange**k outputSP = outputSP + outputPartialQ * scaler * 2 / ( 1 - 1 / onoffratio) outputD = outputD + outputDummyPartialQ * scaler * 2 / ( 1 - 1 / onoffratio) outputSP = outputSP - outputD # minus dummy column outputP = outputP + outputSP scalerIN = 2**z outputIN = outputIN + outputP * scalerIN output = output + outputIN / (2**bitActivation) output = output / (2**bitWeight) else: # original WAGE QCov2d weight1 = self.weight * self.scale + ( self.weight - self.weight * self.scale).detach() weight = weight1 + (wage_quantizer.Q(weight1, self.wl_weight) - weight1).detach() output = F.linear(input, weight, self.bias) output = output / self.scale output = wage_quantizer.WAGEQuantizer_f(output, self.wl_activate, self.wl_error) return output
def pre_save_old_weight(oldWeight, name, wl_weight): if not os.path.exists('./layer_record'): os.makedirs('./layer_record') weight_file_name = './layer_record/Oldweight' + str(name) + '.csv' weight_q = wage_quantizer.Q(oldWeight, wl_weight) write_matrix_weight(weight_q.cpu().data.numpy(), weight_file_name)