示例#1
0
    def forward(self, input: Tensor, hx: Tensor) -> Tensor:
        if hx is None:
            hx = torch.zeros(input.size()[0], self.hidden_size, dtype=input.dtype, device=input.device)

        rnncell = FSUMGUCell(self.input_size, self.hidden_size, bias=self.bias, 
                        weight_ext_f=self.weight_f, bias_ext_f=self.bias_f, weight_ext_n=self.weight_n, bias_ext_n=self.bias_n, 
                        hx_buffer=hx, 
                        hwcfg=self.hwcfg, swcfg=self.swcfg).to(input.device)
        
        iSource = BinGen(input, self.hwcfg, self.swcfg)().to(input.device)
        iRNG = RNG(self.hwcfg, self.swcfg)().to(input.device)
        iBSG = BSGen(iSource, iRNG, self.swcfg).to(input.device)

        hSource = BinGen(hx, self.hwcfg, self.swcfg)().to(input.device)
        hRNG = RNG(self.hwcfg, self.swcfg)().to(input.device)
        hBSG = BSGen(hSource, hRNG, self.swcfg).to(input.device)

        oPE = ProgError(torch.zeros(input.size()[0], self.hidden_size, dtype=input.dtype, device=input.device), 
                        self.hwcfg_ope).to(input.device)

        for c in range(2**self.hwcfg["width"]):
            idx = torch.zeros(iSource.size(), dtype=torch.long, device=input.device)
            iBS = iBSG(idx + c)

            hdx = torch.zeros(hSource.size(), dtype=torch.long, device=input.device)
            hBS = hBSG(hdx + c)

            oBS = rnncell(iBS, hBS)
            oPE.Monitor(oBS)

        hy = oPE()[0]
        return hy
示例#2
0
def test_fsumgu():
    bitwidth_list = [7, 8, 9, 10]
    for bitwidth in bitwidth_list:
        print("bit width:", bitwidth)
        win_sz = 10
        batch = 32
        input_sz = 256
        hidden_sz = 64

        intwidth = 1

        fracwidth = bitwidth - intwidth
        mode = "bipolar"
        depth = bitwidth + 2
        depth_ismul = bitwidth - 4
        rng = "Sobol"
        bias = False
        output_error_only = True

        hwcfg = {
            "width": bitwidth,
            "mode": mode,
            "depth": depth,
            "depth_ismul": depth_ismul,
            "rng": rng,
            "dimr": 1,
            "scale": 1
        }
        swcfg = {
            "btype": torch.float,
            "rtype": torch.float,
            "stype": torch.float
        }

        input = torch.randn(win_sz, batch, input_sz).to(device)
        input = truncated_normal(input, mean=0, std=0.4)
        hx1 = torch.randn(batch, hidden_sz).to(device)
        hx1 = truncated_normal(hx1, mean=0, std=0.1)
        hx2 = hx1.clone().detach().to(device)
        hx3 = hx1.clone().detach().to(device)
        hx4 = hx1.clone().detach().to(device)
        output1 = []
        output2 = []
        output3 = []
        output4 = []

        rnn1 = HardMGUCell(input_sz, hidden_sz, bias=bias,
                           hard=True).to(device)
        rnn3 = HardMGUCellFXP(input_sz,
                              hidden_sz,
                              bias=bias,
                              hard=True,
                              intwidth=intwidth,
                              fracwidth=fracwidth).to(device)
        rnn3.weight_f.data = rnn1.weight_f.clone().detach().to(device)
        rnn3.weight_n.data = rnn1.weight_n.clone().detach().to(device)

        rnn4 = HUBMGUCell(input_sz,
                          hidden_sz,
                          bias=bias,
                          weight_ext_f=rnn1.weight_f,
                          bias_ext_f=rnn1.bias_f,
                          weight_ext_n=rnn1.weight_n,
                          bias_ext_n=rnn1.bias_n,
                          hwcfg=hwcfg).to(device)

        for i in range(win_sz):
            hx1 = rnn1(input[i], hx1)
            output1.append(hx1)

            hx3 = rnn3(input[i], hx3)
            output3.append(hx3)

            hx4 = rnn4(input[i], hx4)
            output4.append(hx4)

            iVec, hVec = input[i], hx2

            # rnn2 in the loop to mimic the hw reset
            rnn2 = FSUMGUCell(input_sz,
                              hidden_sz,
                              bias=bias,
                              weight_ext_f=rnn1.weight_f,
                              bias_ext_f=rnn1.bias_f,
                              weight_ext_n=rnn1.weight_n,
                              bias_ext_n=rnn1.bias_n,
                              hx_buffer=hx2,
                              hwcfg=hwcfg,
                              swcfg=swcfg).to(device)

            iSource = BinGen(iVec, hwcfg, swcfg)().to(device)
            iRNG = RNG(hwcfg, swcfg)().to(device)
            iBSG = BSGen(iSource, iRNG, swcfg).to(device)
            iPE = ProgError(iVec, hwcfg).to(device)

            hSource = BinGen(hVec, hwcfg, swcfg)().to(device)
            hRNG = RNG(hwcfg, swcfg)().to(device)
            hBSG = BSGen(hSource, hRNG, swcfg).to(device)
            hPE = ProgError(hVec, hwcfg).to(device)

            oVec = output1[i]
            oPE = ProgError(oVec, hwcfg).to(device)

            fg_ug_in_PE = ProgError(rnn1.fg_ug_in, hwcfg).to(device)
            fg_in_PE = ProgError(rnn1.fg_in, hwcfg).to(device)
            fg_PE = ProgError(rnn1.fg, hwcfg).to(device)
            fg_hx_PE = ProgError(rnn1.fg_hx, hwcfg).to(device)
            ng_ug_in_PE = ProgError(rnn1.ng_ug_in, hwcfg).to(device)
            ng_PE = ProgError(rnn1.ng, hwcfg).to(device)
            fg_ng_PE = ProgError(rnn1.fg_ng, hwcfg).to(device)
            fg_ng_inv_PE = ProgError(rnn1.fg_ng_inv, hwcfg).to(device)

            for c in range(2**bitwidth):
                idx = torch.zeros(iSource.size()).type(torch.long).to(device)
                iBS = iBSG(idx + c)
                iPE.Monitor(iBS)

                hdx = torch.zeros(hSource.size()).type(torch.long).to(device)
                hBS = hBSG(hdx + c)
                hPE.Monitor(hBS)

                start_time = time.time()

                oBS = rnn2(iBS, hBS)

                fg_ug_in_PE.Monitor(rnn2.fg_ug_in)
                fg_in_PE.Monitor(rnn2.fg_in)
                fg_PE.Monitor(rnn2.fg)
                fg_hx_PE.Monitor(rnn2.fg_hx)
                ng_ug_in_PE.Monitor(rnn2.ng_ug_in)
                ng_PE.Monitor(rnn2.ng)
                fg_ng_PE.Monitor(rnn2.fg_ng)
                fg_ng_inv_PE.Monitor(rnn2.fg_ng_inv)

                oPE.Monitor(oBS)

            hx2 = oPE()[0]
            output2.append(hx2)

            # print("======>> window: " + str(i) + "<<======")
            # print("--- %s seconds ---" % (time.time() - start_time))
            if output_error_only:
                pass
            else:
                progerror_report(iPE, "input")
                progerror_report(hPE, "hidden")

                progerror_report(fg_ug_in_PE, "fg_ug_in")
                progerror_report(fg_in_PE, "fg_in")
                progerror_report(fg_PE, "fg")
                progerror_report(fg_hx_PE, "fg_hx")
                progerror_report(ng_ug_in_PE, "ng_ug_in")
                progerror_report(ng_PE, "ng")
                progerror_report(fg_ng_PE, "fg_ng")
                progerror_report(fg_ng_inv_PE, "fg_ng_inv")

            progerror_report(oPE, str(i) + "-th win output fsu")

            hub_err = hx1 - hx4
            min = hub_err.min().item()
            max = hub_err.max().item()
            rmse = torch.sqrt(torch.mean(torch.square(hub_err)))
            std, mean = torch.std_mean(hub_err)
            print("{:30s}".format(str(i)+"-th win output hub") + \
                    ", Absolute Error range," + "{:12f}".format(min) + ", {:12f}".format(max) + \
                    ", std," + "{:12f}".format(std) + \
                    ", mean," + "{:12f}".format(mean) + \
                    ", rmse," + "{:12f}".format(rmse))

            fxp_err = hx1 - hx3
            min = fxp_err.min().item()
            max = fxp_err.max().item()
            rmse = torch.sqrt(torch.mean(torch.square(fxp_err)))
            std, mean = torch.std_mean(fxp_err)
            print("{:30s}".format(str(i)+"-th win output fxp") + \
                    ", Absolute Error range," + "{:12f}".format(min) + ", {:12f}".format(max) + \
                    ", std," + "{:12f}".format(std) + \
                    ", mean," + "{:12f}".format(mean) + \
                    ", rmse," + "{:12f}".format(rmse))

        print()
示例#3
0
def test_fsuadd():
    hwcfg = {
        "width": 12,
        "mode": "bipolar",
        "dimr": 1,
        "dima": 0,
        "rng": "sobol",
        "scale": 1,
        "depth": 20,
        "entry": None
    }
    swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float}
    bitwidth = hwcfg["width"]
    rng = hwcfg["rng"]

    plot_en = False
    modes = ["bipolar", "unipolar"]
    size = [128, 256, 512]

    scaled = [True, False]
    result_pe = []

    for mode in modes:
        for scale in scaled:
            run_time = 0
            acc_dim = hwcfg["dima"]
            scale_mod = size[acc_dim]
            result_pe_cycle = []
            hwcfg["mode"] = mode
            hwcfg["scale"] = scale_mod if scale else 1
            uadd = FSUAdd(hwcfg, swcfg).to(device)

            if mode == "unipolar":
                iVec = torch.rand(size).mul(2**bitwidth).round().div(
                    2**bitwidth).to(device)
            elif mode == "bipolar":
                iVec = torch.rand(size).mul(2).sub(1).mul(
                    2**bitwidth).round().div(2**bitwidth).to(device)

            oVec = torch.sum(iVec, acc_dim).to(device)

            iVecSource = BinGen(iVec, hwcfg, swcfg)().to(device)
            iVecRNG = RNG(hwcfg, swcfg)().to(device)
            iVecBS = BSGen(iVecSource, iVecRNG, swcfg).to(device)
            hwcfg["scale"] = 1
            iVecPE = ProgError(iVec, hwcfg).to(device)
            print("iVecPE cfg", iVecPE.hwcfg)
            hwcfg["scale"] = scale_mod if scale else 1
            oVecPE = ProgError(oVec, hwcfg).to(device)
            print("oVecPE cfg", oVecPE.hwcfg)

            with torch.no_grad():
                idx = torch.zeros(iVecSource.size()).type(
                    torch.long).to(device)
                for i in range(2**bitwidth):
                    iBS = iVecBS(idx + i)
                    iVecPE.Monitor(iBS)

                    start_time = time.time()
                    oVecU = uadd(iBS)
                    run_time = time.time() - start_time + run_time

                    if i == 0:
                        print("uadd cfg", uadd.hwcfg)

                    oVecPE.Monitor(oVecU)
                    rmse = torch.sqrt(
                        torch.mean(torch.mul(oVecPE()[1],
                                             oVecPE()[1])))
                    result_pe_cycle.append(1 - rmse.item())
                print("--- %s seconds ---" % (time.time() - start_time))
                print("RNG: " + rng + ", data: " + mode + ", scaled: " +
                      str(scale))
                print("input error:  ", "min: ",
                      torch.min(iVecPE()[1]).item(), "max: ",
                      torch.max(iVecPE()[1]).item())
                print("output error: ", "min: ",
                      torch.min(oVecPE()[1]).item(), "max: ",
                      torch.max(oVecPE()[1]).item(), "RMSE: ", rmse.item())
                print()
                if plot_en is True:
                    result_pe = oVecPE()[1].cpu().numpy()
                    print("error distribution=========>")
                    plt.figure(figsize=(3, 1.5))
                    fig = plt.hist(
                        result_pe.flatten(),
                        bins='auto')  # arguments are passed to np.histogram
                    plt.show()
                    print("progressive accuracy=========>")
                    plt.figure(figsize=(3, 1.5))
                    fig = plt.plot(result_pe_cycle
                                   )  # arguments are passed to np.histogram
                    plt.show()
示例#4
0
def test_bi2uni():
    hwcfg = {
        "width": 8,
        "mode": "bipolar",
        "dimr": 1,
        "rng": "sobol",
        "scale": 1,
        "depth": 3
    }
    swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float}
    bitwidth = hwcfg["width"]
    mode = hwcfg["mode"]

    rng = "Sobol"

    in_dim = 1024
    bitwidth = 8
    in_mode = "bipolar"
    out_mode = "unipolar"
    stype = torch.float
    btype = torch.float
    rtype = torch.float

    uBi2Uni = Bi2Uni(hwcfg, swcfg).to(device)

    iVec = ((torch.rand(in_dim) * (2**bitwidth)).round() /
            (2**bitwidth)).to(device)
    start_time = time.time()
    oVec = iVec.type(torch.float)
    print("--- %s seconds ---" % (((time.time() - start_time)) * 2**bitwidth))

    print("input", iVec)
    print("real output", oVec)

    hwcfg["mode"] = "bipolar"
    iVecSource = BinGen(iVec, hwcfg, swcfg)().to(device)

    iVecRNG = RNG(hwcfg, swcfg)().to(device)
    iVecBS = BSGen(iVecSource, iVecRNG, swcfg).to(device)

    iVecPE = ProgError(iVec, hwcfg).to(device)
    hwcfg["mode"] = "unipolar"
    oVecPE = ProgError(oVec, hwcfg).to(device)

    hwcfg["mode"] = "bipolar"

    with torch.no_grad():
        idx = torch.zeros(iVecSource.size()).type(torch.long).to(device)
        start_time = time.time()
        for i in range((2**bitwidth)):
            iBS = iVecBS(idx + i)
            iVecPE.Monitor(iBS)

            oVecU = uBi2Uni(iBS)
            oVecPE.Monitor(oVecU)
        print("--- %s seconds ---" % (time.time() - start_time))
        print("final input error: ", min(iVecPE()[1]), max(iVecPE()[1]))
        print("final output error:", min(oVecPE()[1]), max(oVecPE()[1]))
        print("final output pp:", oVecPE()[0].data)
        print("final output pe:", oVecPE()[1].data)
        print("final output mean error:", oVecPE()[1].mean())

        result_pe = oVecPE()[1].cpu().numpy()

    # fig = plt.hist(result_pe, bins='auto')  # arguments are passed to np.histogram
    # plt.title("Histogram for final output error")
    # plt.show()

    print(result_pe)
    print(result_pe.argmin(), result_pe.argmax())
    print(result_pe[result_pe.argmin()], result_pe[result_pe.argmax()])
    print(iVec[result_pe.argmin()], iVec[result_pe.argmax()])
def test_fsuconv2d():
    plot_en = False

    hwcfg_input = {"width": 8, "rng": "Sobol", "dimr": 1}
    hwcfg = {
        "width": 8,
        "mode": "bipolar",
        "scale": None,
        "depth": 20,
        "rng": "Sobol",
        "dimr": 1
    }
    swcfg = {"btype": torch.float, "rtype": torch.float, "stype": torch.float}

    rng = hwcfg["rng"]

    in_channels = 32
    out_channels = 16
    kernel_size = 3
    stride = 2
    padding = 0
    dilation = 1
    groups = 1
    bias = True
    padding_mode = 'zeros'

    modes = ["bipolar", "unipolar"]
    scaled = [True, False]
    result_pe = []

    for mode in modes:
        for scale in scaled:
            hwcfg["mode"] = mode
            hwcfg_input["mode"] = mode
            hwcfg["scale"] = (kernel_size * kernel_size * in_channels +
                              bias) if scale else 1

            length = 2**hwcfg["width"]
            length_input = 2**hwcfg_input["width"]
            result_pe_cycle = []
            conv2d = torch.nn.Conv2d(in_channels,
                                     out_channels,
                                     kernel_size,
                                     stride=stride,
                                     padding=padding,
                                     dilation=dilation,
                                     groups=groups,
                                     bias=bias,
                                     padding_mode=padding_mode).to(device)

            if mode == "unipolar":
                conv2d.weight.data = torch.rand(
                    out_channels, in_channels, kernel_size,
                    kernel_size).mul(length).round().div(length).to(device)
                if bias is True:
                    conv2d.bias.data = torch.rand(out_channels).mul(
                        length).round().div(length).to(device)
            elif mode == "bipolar":
                conv2d.weight.data = torch.rand(
                    out_channels, in_channels, kernel_size, kernel_size).mul(
                        2).sub(1).mul(length).round().div(length).to(device)
                if bias is True:
                    conv2d.bias.data = torch.rand(out_channels).mul(2).sub(
                        1).mul(length).round().div(length).to(device)

            uconv2d = FSUConv2d(in_channels,
                                out_channels,
                                kernel_size,
                                stride=stride,
                                padding=padding,
                                dilation=dilation,
                                groups=groups,
                                bias=bias,
                                padding_mode=padding_mode,
                                weight_ext=conv2d.weight,
                                bias_ext=conv2d.bias,
                                hwcfg=hwcfg,
                                swcfg=swcfg).to(device)

            input_size = (128, 32)
            iVec = (
                (torch.rand(32, in_channels, input_size[0], input_size[1]) *
                 length_input).round() / length_input).to(device)
            oVec = conv2d(iVec)

            iVecSource = BinGen(iVec, hwcfg, swcfg)().to(device)
            iVecRNG = RNG(hwcfg_input, swcfg)().to(device)
            iVecBS = BSGen(iVecSource, iVecRNG, swcfg).to(device)

            hwcfg["scale"] = 1
            iVecPE = ProgError(iVec, hwcfg).to(device)

            hwcfg["scale"] = (kernel_size * kernel_size * in_channels +
                              bias) if scale else 1
            oVecPE = ProgError(oVec, hwcfg).to(device)

            with torch.no_grad():
                idx = torch.zeros(iVecSource.size()).type(
                    torch.long).to(device)
                start_time = time.time()
                for i in range(length):
                    iBS = iVecBS(idx + i)
                    iVecPE.Monitor(iBS)

                    oVecU = uconv2d(iBS)
                    oVecPE.Monitor(oVecU)
                    rmse = torch.sqrt(
                        torch.sum(torch.mul(oVecPE()[1],
                                            oVecPE()[1])) /
                        torch.prod(torch.tensor(oVecPE()[1].size())))
                    if plot_en is True:
                        result_pe_cycle.append(1 - rmse.item())
                print("--- %s seconds ---" % (time.time() - start_time))
                print("RNG: " + rng + ", data: " + mode + ", scaled: " +
                      str(scale))
                print("input error:  ", "min: ",
                      torch.min(iVecPE()[1]).item(), "max: ",
                      torch.max(iVecPE()[1]).item())
                print("output error: ", "min: ",
                      torch.min(oVecPE()[1]).item(), "max: ",
                      torch.max(oVecPE()[1]).item(), "RMSE: ", rmse.item())
                print()
                if plot_en is True:
                    result_pe = oVecPE()[1].cpu().numpy()
                    print("error distribution=========>")
                    plt.figure(figsize=(3, 1.5))
                    fig = plt.hist(
                        result_pe.flatten(),
                        bins='auto')  # arguments are passed to np.histogram
                    plt.show()
                    print("progressive accuracy=========>")
                    plt.figure(figsize=(3, 1.5))
                    fig = plt.plot(result_pe_cycle
                                   )  # arguments are passed to np.histogram
                    plt.show()