예제 #1
0
    def forward(self, input):
        weight1 = self.weight * self.scale + (self.weight - self.weight * self.scale).detach()
        weight = weight1 + (wage_quantizer.Q(weight1,self.wl_weight) -weight1).detach()
        output= F.conv2d(input, weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
        output = output/self.scale

        output = wage_quantizer.WAGEQuantizer_f(output, self.wl_activate, self.wl_error)
        return output
예제 #2
0
    def forward(self, input):
        weight1 = self.weight * self.scale + (self.weight - self.weight * self.scale).detach()
        weight = weight1 + (wage_quantizer.Q(weight1,self.wl_weight) -weight1).detach()

        output = F.linear(input, weight, self.bias)
        output = output/self.scale

        output = wage_quantizer.WAGEQuantizer_f(output,self.wl_activate, self.wl_error)
        return output
    def forward(self, input):

        weight1 = self.weight * self.scale + (
            self.weight - self.weight * self.scale).detach()
        weight = weight1 + (wage_quantizer.Q(weight1, self.wl_weight) -
                            weight1).detach()
        outputOrignal = F.conv2d(input, weight, self.bias, self.stride,
                                 self.padding, self.dilation, self.groups)

        bitWeight = int(self.wl_weight)
        bitActivation = int(self.wl_input)

        if self.inference == 1:
            # retention
            weight = wage_quantizer.Retention(weight, self.t, self.v,
                                              self.detect, self.target)
            # set parameters for Hardware Inference
            onoffratio = self.onoffratio
            upper = 1
            lower = 1 / onoffratio

            output = torch.zeros_like(outputOrignal)
            del outputOrignal
            cellRange = 2**self.cellBit  # cell precision is 4

            # Now consider on/off ratio
            dummyP = torch.zeros_like(weight)
            dummyP[:, :, :, :] = (cellRange - 1) * (upper + lower) / 2

            for i in range(3):
                for j in range(3):
                    # need to divide to different subArray
                    numSubArray = int(weight.shape[1] / self.subArray)
                    # cut into different subArrays
                    if numSubArray == 0:
                        mask = torch.zeros_like(weight)
                        mask[:, :, i, j] = 1
                        if weight.shape[1] == 3:
                            # after get the spacial kernel, need to transfer floating weight [-1, 1] to binarized ones
                            X_decimal = torch.round((2**bitWeight - 1) / 2 *
                                                    (weight + 1) + 0) * mask
                            outputP = torch.zeros_like(output)
                            outputD = torch.zeros_like(output)
                            for k in range(int(bitWeight / self.cellBit)):
                                remainder = torch.fmod(X_decimal,
                                                       cellRange) * mask
                                variation = np.random.normal(
                                    0, self.vari,
                                    list(weight.size())).astype(np.float32)
                                X_decimal = torch.round(
                                    (X_decimal - remainder) / cellRange) * mask
                                # Now also consider weight has on/off ratio effects
                                # Here remainder is the weight mapped to Hardware, so we introduce on/off ratio in this value
                                # the range of remainder is [0, cellRange-1], we truncate it to [lower, upper]
                                remainderQ = (upper - lower) * (
                                    remainder - 0
                                ) + (
                                    cellRange - 1
                                ) * lower  # weight cannot map to 0, but to Gmin
                                remainderQ = remainderQ + remainderQ * torch.from_numpy(
                                    variation).cuda()
                                outputPartial = F.conv2d(
                                    input, remainderQ * mask, self.bias,
                                    self.stride, self.padding, self.dilation,
                                    self.groups)
                                outputDummyPartial = F.conv2d(
                                    input, dummyP * mask, self.bias,
                                    self.stride, self.padding, self.dilation,
                                    self.groups)
                                scaler = cellRange**k
                                outputP = outputP + outputPartial * scaler * 2 / (
                                    1 - 1 / onoffratio)
                                outputD = outputD + outputDummyPartial * scaler * 2 / (
                                    1 - 1 / onoffratio)
                            outputP = outputP - outputD
                            output = output + outputP
                        else:
                            # quantize input into binary sequence
                            inputQ = torch.round((2**bitActivation - 1) / 1 *
                                                 (input - 0) + 0)
                            outputIN = torch.zeros_like(output)
                            for z in range(bitActivation):
                                inputB = torch.fmod(inputQ, 2)
                                inputQ = torch.round((inputQ - inputB) / 2)
                                outputP = torch.zeros_like(output)
                                # after get the spacial kernel, need to transfer floating weight [-1, 1] to binarized ones
                                X_decimal = torch.round(
                                    (2**bitWeight - 1) / 2 *
                                    (weight + 1) + 0) * mask
                                outputD = torch.zeros_like(output)
                                for k in range(int(bitWeight / self.cellBit)):
                                    remainder = torch.fmod(
                                        X_decimal, cellRange) * mask
                                    variation = np.random.normal(
                                        0, self.vari,
                                        list(weight.size())).astype(np.float32)
                                    X_decimal = torch.round(
                                        (X_decimal - remainder) /
                                        cellRange) * mask
                                    # Now also consider weight has on/off ratio effects
                                    # Here remainder is the weight mapped to Hardware, so we introduce on/off ratio in this value
                                    # the range of remainder is [0, cellRange-1], we truncate it to [lower, upper]
                                    remainderQ = (upper - lower) * (
                                        remainder - 0
                                    ) + (
                                        cellRange - 1
                                    ) * lower  # weight cannot map to 0, but to Gmin
                                    remainderQ = remainderQ + remainderQ * torch.from_numpy(
                                        variation).cuda()
                                    outputPartial = F.conv2d(
                                        input, remainderQ * mask, self.bias,
                                        self.stride, self.padding,
                                        self.dilation, self.groups)
                                    outputDummyPartial = F.conv2d(
                                        input, dummyP * mask, self.bias,
                                        self.stride, self.padding,
                                        self.dilation, self.groups)
                                    # Add ADC quanization effects here !!!
                                    outputPartialQ = wage_quantizer.LinearQuantizeOut(
                                        outputPartial, self.ADCprecision)
                                    outputDummyPartialQ = wage_quantizer.LinearQuantizeOut(
                                        outputDummyPartial, self.ADCprecision)
                                    scaler = cellRange**k
                                    outputP = outputP + outputPartialQ * scaler * 2 / (
                                        1 - 1 / onoffratio)
                                    outputD = outputD + outputDummyPartialQ * scaler * 2 / (
                                        1 - 1 / onoffratio)
                                scalerIN = 2**z
                                outputIN = outputIN + (outputP -
                                                       outputD) * scalerIN
                            output = output + outputIN / (2**bitActivation)
                    else:
                        # quantize input into binary sequence
                        inputQ = torch.round((2**bitActivation - 1) / 1 *
                                             (input - 0) + 0)
                        outputIN = torch.zeros_like(output)
                        for z in range(bitActivation):
                            inputB = torch.fmod(inputQ, 2)
                            inputQ = torch.round((inputQ - inputB) / 2)
                            outputP = torch.zeros_like(output)
                            for s in range(numSubArray):
                                mask = torch.zeros_like(weight)
                                mask[:, (s * self.subArray):(s + 1) *
                                     self.subArray, i, j] = 1
                                # after get the spacial kernel, need to transfer floating weight [-1, 1] to binarized ones
                                X_decimal = torch.round(
                                    (2**bitWeight - 1) / 2 *
                                    (weight + 1) + 0) * mask
                                outputSP = torch.zeros_like(output)
                                outputD = torch.zeros_like(output)
                                for k in range(int(bitWeight / self.cellBit)):
                                    remainder = torch.fmod(
                                        X_decimal, cellRange) * mask
                                    variation = np.random.normal(
                                        0, self.vari,
                                        list(weight.size())).astype(np.float32)
                                    X_decimal = torch.round(
                                        (X_decimal - remainder) /
                                        cellRange) * mask
                                    # Now also consider weight has on/off ratio effects
                                    # Here remainder is the weight mapped to Hardware, so we introduce on/off ratio in this value
                                    # the range of remainder is [0, cellRange-1], we truncate it to [lower, upper]*(cellRange-1)
                                    remainderQ = (upper - lower) * (
                                        remainder - 0
                                    ) + (
                                        cellRange - 1
                                    ) * lower  # weight cannot map to 0, but to Gmin
                                    remainderQ = remainderQ + remainderQ * torch.from_numpy(
                                        variation).cuda()
                                    outputPartial = F.conv2d(
                                        inputB, remainderQ * mask, self.bias,
                                        self.stride, self.padding,
                                        self.dilation, self.groups)
                                    outputDummyPartial = F.conv2d(
                                        inputB, dummyP * mask, self.bias,
                                        self.stride, self.padding,
                                        self.dilation, self.groups)
                                    # Add ADC quanization effects here !!!
                                    outputPartialQ = wage_quantizer.LinearQuantizeOut(
                                        outputPartial, self.ADCprecision)
                                    outputDummyPartialQ = wage_quantizer.LinearQuantizeOut(
                                        outputDummyPartial, self.ADCprecision)
                                    scaler = cellRange**k
                                    outputSP = outputSP + outputPartialQ * scaler * 2 / (
                                        1 - 1 / onoffratio)
                                    outputD = outputD + outputDummyPartialQ * scaler * 2 / (
                                        1 - 1 / onoffratio)
                                    if (weight.shape[0]
                                            == 256) & (weight.shape[1] == 128):
                                        weightMatrix = (
                                            remainderQ *
                                            mask).cpu().data.numpy()
                                        weight_file_name = './layer_record/weightForLayer3_subarray' + str(
                                            s) + '_weightBitNo_' + str(
                                                k) + ".csv"
                                        cout = weightMatrix.shape[0]
                                        weight_matrix = weightMatrix.reshape(
                                            cout, -1).transpose()
                                        np.savetxt(weight_file_name,
                                                   weight_matrix,
                                                   delimiter=",",
                                                   fmt='%10.5f')
                                # !!! Important !!! the dummy need to be multiplied by a ratio
                                outputSP = outputSP - outputD  # minus dummy column
                                outputP = outputP + outputSP
                            scalerIN = 2**z
                            outputIN = outputIN + outputP * scalerIN
                        output = output + outputIN / (2**bitActivation)
            output = output / (
                2**bitWeight
            )  # since weight range was convert from [-1, 1] to [-256, 256]
        else:
            # original WAGE QCov2d
            weight1 = self.weight * self.scale + (
                self.weight - self.weight * self.scale).detach()
            weight = weight1 + (wage_quantizer.Q(weight1, self.wl_weight) -
                                weight1).detach()
            output = F.conv2d(input, weight, self.bias, self.stride,
                              self.padding, self.dilation, self.groups)
        output = output / self.scale
        output = wage_quantizer.WAGEQuantizer_f(output, self.wl_activate,
                                                self.wl_error)

        return output
    def forward(self, input):

        weight1 = self.weight * self.scale + (
            self.weight - self.weight * self.scale).detach()
        weight = weight1 + (wage_quantizer.Q(weight1, self.wl_weight) -
                            weight1).detach()
        outputOrignal = F.linear(input, weight, self.bias)
        output = torch.zeros_like(outputOrignal)

        bitWeight = int(self.wl_weight)
        bitActivation = int(self.wl_input)

        if self.inference == 1:
            # retention
            weight = wage_quantizer.Retention(weight, self.t, self.v,
                                              self.detect, self.target)
            # set parameters for Hardware Inference
            onoffratio = self.onoffratio
            upper = 1
            lower = 1 / onoffratio
            output = torch.zeros_like(outputOrignal)
            cellRange = 2**self.cellBit  # cell precision is 4
            # Now consider on/off ratio
            dummyP = torch.zeros_like(weight)
            dummyP[:, :] = (cellRange - 1) * (upper + lower) / 2
            # need to divide to different subArray
            numSubArray = int(weight.shape[1] / self.subArray)

            if numSubArray == 0:
                mask = torch.zeros_like(weight)
                mask[:, :] = 1
                # quantize input into binary sequence
                inputQ = torch.round((2**bitActivation - 1) / 1 * (input - 0) +
                                     0)
                outputIN = torch.zeros_like(outputOrignal)
                for z in range(bitActivation):
                    inputB = torch.fmod(inputQ, 2)
                    inputQ = torch.round((inputQ - inputB) / 2)
                    # after get the spacial kernel, need to transfer floating weight [-1, 1] to binarized ones
                    X_decimal = torch.round((2**bitWeight - 1) / 2 *
                                            (weight + 1) + 0) * mask
                    outputP = torch.zeros_like(outputOrignal)
                    outputD = torch.zeros_like(outputOrignal)
                    for k in range(int(bitWeight / self.cellBit)):
                        remainder = torch.fmod(X_decimal, cellRange) * mask
                        variation = np.random.normal(
                            0, self.vari,
                            list(weight.size())).astype(np.float32)
                        X_decimal = torch.round(
                            (X_decimal - remainder) / cellRange) * mask
                        # Now also consider weight has on/off ratio effects
                        # Here remainder is the weight mapped to Hardware, so we introduce on/off ratio in this value
                        # the range of remainder is [0, cellRange-1], we truncate it to [lower, upper]
                        remainderQ = (upper - lower) * (remainder - 0) + (
                            cellRange -
                            1) * lower  # weight cannot map to 0, but to Gmin
                        remainderQ = remainderQ + remainderQ * torch.from_numpy(
                            variation).cuda()
                        outputPartial = F.linear(input, remainderQ * mask,
                                                 self.bias)
                        outputDummyPartial = F.linear(input, dummyP * mask,
                                                      self.bias)
                        # Add ADC quanization effects here !!!
                        outputPartialQ = wage_quantizer.LinearQuantizeOut(
                            outputPartial, self.ADCprecision)
                        outputDummyPartialQ = wage_quantizer.LinearQuantizeOut(
                            outputDummyPartial, self.ADCprecision)
                        scaler = cellRange**k
                        outputP = outputP + outputPartialQ * scaler * 2 / (
                            1 - 1 / onoffratio)
                        outputD = outputD + outputDummyPartialQ * scaler * 2 / (
                            1 - 1 / onoffratio)
                    scalerIN = 2**z
                    outputIN = outputIN + (outputP - outputD) * scalerIN
                output = output + outputIN / (2**bitActivation)
            else:
                inputQ = torch.round((2**bitActivation - 1) / 1 * (input - 0) +
                                     0)
                outputIN = torch.zeros_like(outputOrignal)
                for z in range(bitActivation):
                    inputB = torch.fmod(inputQ, 2)
                    inputQ = torch.round((inputQ - inputB) / 2)
                    outputP = torch.zeros_like(outputOrignal)
                    for s in range(numSubArray):
                        mask = torch.zeros_like(weight)
                        mask[:,
                             (s * self.subArray):(s + 1) * self.subArray] = 1
                        # after get the spacial kernel, need to transfer floating weight [-1, 1] to binarized ones
                        X_decimal = torch.round((2**bitWeight - 1) / 2 *
                                                (weight + 1) + 0) * mask
                        outputSP = torch.zeros_like(outputOrignal)
                        outputD = torch.zeros_like(outputOrignal)
                        for k in range(int(bitWeight / self.cellBit)):
                            remainder = torch.fmod(X_decimal, cellRange) * mask
                            variation = np.random.normal(
                                0, self.vari,
                                list(remainder.size())).astype(np.float32)
                            X_decimal = torch.round(
                                (X_decimal - remainder) / cellRange) * mask
                            # Now also consider weight has on/off ratio effects
                            # Here remainder is the weight mapped to Hardware, so we introduce on/off ratio in this value
                            # the range of remainder is [0, cellRange-1], we truncate it to [lower, upper]*(cellRange-1)
                            remainderQ = (upper - lower) * (remainder - 0) + (
                                cellRange - 1
                            ) * lower  # weight cannot map to 0, but to Gmin
                            remainderQ = remainderQ + remainderQ * torch.from_numpy(
                                variation).cuda()
                            outputPartial = F.linear(input, remainderQ * mask,
                                                     self.bias)
                            outputDummyPartial = F.linear(
                                input, dummyP * mask, self.bias)
                            # Add ADC quanization effects here !!!
                            outputPartialQ = wage_quantizer.LinearQuantizeOut(
                                outputPartial, self.ADCprecision)
                            outputDummyPartialQ = wage_quantizer.LinearQuantizeOut(
                                outputDummyPartial, self.ADCprecision)
                            scaler = cellRange**k
                            outputSP = outputSP + outputPartialQ * scaler * 2 / (
                                1 - 1 / onoffratio)
                            outputD = outputD + outputDummyPartialQ * scaler * 2 / (
                                1 - 1 / onoffratio)
                        outputSP = outputSP - outputD  # minus dummy column
                        outputP = outputP + outputSP
                    scalerIN = 2**z
                    outputIN = outputIN + outputP * scalerIN
                output = output + outputIN / (2**bitActivation)
            output = output / (2**bitWeight)
        else:
            # original WAGE QCov2d
            weight1 = self.weight * self.scale + (
                self.weight - self.weight * self.scale).detach()
            weight = weight1 + (wage_quantizer.Q(weight1, self.wl_weight) -
                                weight1).detach()
            output = F.linear(input, weight, self.bias)

        output = output / self.scale
        output = wage_quantizer.WAGEQuantizer_f(output, self.wl_activate,
                                                self.wl_error)

        return output