Exemplo n.º 1
0
    def forward(self, input: Tensor, hx: Tensor) -> Tensor:
        if hx is None:
            hx = torch.zeros(input.size()[0], self.hidden_size, dtype=input.dtype, device=input.device)

        rnncell = FSUMGUCell(self.input_size, self.hidden_size, bias=self.bias, 
                        weight_ext_f=self.weight_f, bias_ext_f=self.bias_f, weight_ext_n=self.weight_n, bias_ext_n=self.bias_n, 
                        hx_buffer=hx, 
                        hwcfg=self.hwcfg, swcfg=self.swcfg).to(input.device)
        
        iSource = BinGen(input, self.hwcfg, self.swcfg)().to(input.device)
        iRNG = RNG(self.hwcfg, self.swcfg)().to(input.device)
        iBSG = BSGen(iSource, iRNG, self.swcfg).to(input.device)

        hSource = BinGen(hx, self.hwcfg, self.swcfg)().to(input.device)
        hRNG = RNG(self.hwcfg, self.swcfg)().to(input.device)
        hBSG = BSGen(hSource, hRNG, self.swcfg).to(input.device)

        oPE = ProgError(torch.zeros(input.size()[0], self.hidden_size, dtype=input.dtype, device=input.device), 
                        self.hwcfg_ope).to(input.device)

        for c in range(2**self.hwcfg["width"]):
            idx = torch.zeros(iSource.size(), dtype=torch.long, device=input.device)
            iBS = iBSG(idx + c)

            hdx = torch.zeros(hSource.size(), dtype=torch.long, device=input.device)
            hBS = hBSG(hdx + c)

            oBS = rnncell(iBS, hBS)
            oPE.Monitor(oBS)

        hy = oPE()[0]
        return hy
Exemplo n.º 2
0
    def __init__(self,
                 hwcfg={
                     "entry": 4,
                     "rng": "Sobol",
                     "dimr": 4
                 },
                 swcfg={"stype": torch.float}):
        super(CORDIV_kernel, self).__init__()
        self.hwcfg = {}
        self.hwcfg["entry"] = hwcfg["entry"]
        self.hwcfg["rng"] = hwcfg["rng"].lower()
        self.hwcfg["dimr"] = hwcfg["dimr"]

        self.swcfg = {}
        self.swcfg["stype"] = swcfg["stype"]

        self.entry = hwcfg["entry"]
        self.sr = ShiftReg(self.hwcfg, self.swcfg)
        hwcfg_rng = {
            "width": int(math.log2(self.entry)),
            "dimr": hwcfg["dimr"],
            "rng": hwcfg["rng"]
        }
        swcfg_rng = {"rtype": torch.long}
        self.rng = RNG(hwcfg_rng, swcfg_rng)()
        self.idx = torch.nn.Parameter(torch.zeros(1).type(torch.long),
                                      requires_grad=False)
        self.stype = swcfg["stype"]
        self.historic_q = torch.nn.Parameter(torch.ones(1).type(self.stype),
                                             requires_grad=False)
Exemplo n.º 3
0
def test_bi2uni():
    hwcfg = {
        "width": 8,
        "mode": "bipolar",
        "dimr": 1,
        "rng": "sobol",
        "scale": 1,
        "depth": 3
    }
    swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float}
    bitwidth = hwcfg["width"]
    mode = hwcfg["mode"]

    rng = "Sobol"

    in_dim = 1024
    bitwidth = 8
    in_mode = "bipolar"
    out_mode = "unipolar"
    stype = torch.float
    btype = torch.float
    rtype = torch.float

    uBi2Uni = Bi2Uni(hwcfg, swcfg).to(device)

    iVec = ((torch.rand(in_dim) * (2**bitwidth)).round() /
            (2**bitwidth)).to(device)
    start_time = time.time()
    oVec = iVec.type(torch.float)
    print("--- %s seconds ---" % (((time.time() - start_time)) * 2**bitwidth))

    print("input", iVec)
    print("real output", oVec)

    hwcfg["mode"] = "bipolar"
    iVecSource = BinGen(iVec, hwcfg, swcfg)().to(device)

    iVecRNG = RNG(hwcfg, swcfg)().to(device)
    iVecBS = BSGen(iVecSource, iVecRNG, swcfg).to(device)

    iVecPE = ProgError(iVec, hwcfg).to(device)
    hwcfg["mode"] = "unipolar"
    oVecPE = ProgError(oVec, hwcfg).to(device)

    hwcfg["mode"] = "bipolar"

    with torch.no_grad():
        idx = torch.zeros(iVecSource.size()).type(torch.long).to(device)
        start_time = time.time()
        for i in range((2**bitwidth)):
            iBS = iVecBS(idx + i)
            iVecPE.Monitor(iBS)

            oVecU = uBi2Uni(iBS)
            oVecPE.Monitor(oVecU)
        print("--- %s seconds ---" % (time.time() - start_time))
        print("final input error: ", min(iVecPE()[1]), max(iVecPE()[1]))
        print("final output error:", min(oVecPE()[1]), max(oVecPE()[1]))
        print("final output pp:", oVecPE()[0].data)
        print("final output pe:", oVecPE()[1].data)
        print("final output mean error:", oVecPE()[1].mean())

        result_pe = oVecPE()[1].cpu().numpy()

    # fig = plt.hist(result_pe, bins='auto')  # arguments are passed to np.histogram
    # plt.title("Histogram for final output error")
    # plt.show()

    print(result_pe)
    print(result_pe.argmin(), result_pe.argmax())
    print(result_pe[result_pe.argmin()], result_pe[result_pe.argmax()])
    print(iVec[result_pe.argmin()], iVec[result_pe.argmax()])
Exemplo n.º 4
0
def test_fsuconv2d():
    plot_en = False

    hwcfg_input = {"width": 8, "rng": "Sobol", "dimr": 1}
    hwcfg = {
        "width": 8,
        "mode": "bipolar",
        "scale": None,
        "depth": 20,
        "rng": "Sobol",
        "dimr": 1
    }
    swcfg = {"btype": torch.float, "rtype": torch.float, "stype": torch.float}

    rng = hwcfg["rng"]

    in_channels = 32
    out_channels = 16
    kernel_size = 3
    stride = 2
    padding = 0
    dilation = 1
    groups = 1
    bias = True
    padding_mode = 'zeros'

    modes = ["bipolar", "unipolar"]
    scaled = [True, False]
    result_pe = []

    for mode in modes:
        for scale in scaled:
            hwcfg["mode"] = mode
            hwcfg_input["mode"] = mode
            hwcfg["scale"] = (kernel_size * kernel_size * in_channels +
                              bias) if scale else 1

            length = 2**hwcfg["width"]
            length_input = 2**hwcfg_input["width"]
            result_pe_cycle = []
            conv2d = torch.nn.Conv2d(in_channels,
                                     out_channels,
                                     kernel_size,
                                     stride=stride,
                                     padding=padding,
                                     dilation=dilation,
                                     groups=groups,
                                     bias=bias,
                                     padding_mode=padding_mode).to(device)

            if mode == "unipolar":
                conv2d.weight.data = torch.rand(
                    out_channels, in_channels, kernel_size,
                    kernel_size).mul(length).round().div(length).to(device)
                if bias is True:
                    conv2d.bias.data = torch.rand(out_channels).mul(
                        length).round().div(length).to(device)
            elif mode == "bipolar":
                conv2d.weight.data = torch.rand(
                    out_channels, in_channels, kernel_size, kernel_size).mul(
                        2).sub(1).mul(length).round().div(length).to(device)
                if bias is True:
                    conv2d.bias.data = torch.rand(out_channels).mul(2).sub(
                        1).mul(length).round().div(length).to(device)

            uconv2d = FSUConv2d(in_channels,
                                out_channels,
                                kernel_size,
                                stride=stride,
                                padding=padding,
                                dilation=dilation,
                                groups=groups,
                                bias=bias,
                                padding_mode=padding_mode,
                                weight_ext=conv2d.weight,
                                bias_ext=conv2d.bias,
                                hwcfg=hwcfg,
                                swcfg=swcfg).to(device)

            input_size = (128, 32)
            iVec = (
                (torch.rand(32, in_channels, input_size[0], input_size[1]) *
                 length_input).round() / length_input).to(device)
            oVec = conv2d(iVec)

            iVecSource = BinGen(iVec, hwcfg, swcfg)().to(device)
            iVecRNG = RNG(hwcfg_input, swcfg)().to(device)
            iVecBS = BSGen(iVecSource, iVecRNG, swcfg).to(device)

            hwcfg["scale"] = 1
            iVecPE = ProgError(iVec, hwcfg).to(device)

            hwcfg["scale"] = (kernel_size * kernel_size * in_channels +
                              bias) if scale else 1
            oVecPE = ProgError(oVec, hwcfg).to(device)

            with torch.no_grad():
                idx = torch.zeros(iVecSource.size()).type(
                    torch.long).to(device)
                start_time = time.time()
                for i in range(length):
                    iBS = iVecBS(idx + i)
                    iVecPE.Monitor(iBS)

                    oVecU = uconv2d(iBS)
                    oVecPE.Monitor(oVecU)
                    rmse = torch.sqrt(
                        torch.sum(torch.mul(oVecPE()[1],
                                            oVecPE()[1])) /
                        torch.prod(torch.tensor(oVecPE()[1].size())))
                    if plot_en is True:
                        result_pe_cycle.append(1 - rmse.item())
                print("--- %s seconds ---" % (time.time() - start_time))
                print("RNG: " + rng + ", data: " + mode + ", scaled: " +
                      str(scale))
                print("input error:  ", "min: ",
                      torch.min(iVecPE()[1]).item(), "max: ",
                      torch.max(iVecPE()[1]).item())
                print("output error: ", "min: ",
                      torch.min(oVecPE()[1]).item(), "max: ",
                      torch.max(oVecPE()[1]).item(), "RMSE: ", rmse.item())
                print()
                if plot_en is True:
                    result_pe = oVecPE()[1].cpu().numpy()
                    print("error distribution=========>")
                    plt.figure(figsize=(3, 1.5))
                    fig = plt.hist(
                        result_pe.flatten(),
                        bins='auto')  # arguments are passed to np.histogram
                    plt.show()
                    print("progressive accuracy=========>")
                    plt.figure(figsize=(3, 1.5))
                    fig = plt.plot(result_pe_cycle
                                   )  # arguments are passed to np.histogram
                    plt.show()
Exemplo n.º 5
0
def test_fsudiv():
    hwcfg = {
        "width": 8,
        "mode": "unipolar",
        "rng": "Sobol",
        "dimr": 4,
        "scale": 1,
        "depth_sa": 3,
        "depth_ss": 2,
        "entry_kn": 2
    }
    swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float}

    bitwidth = hwcfg["width"]
    mode = hwcfg["mode"]
    total_cnt = 1
    savepdf = False
    stype = swcfg["stype"]
    btype = swcfg["btype"]
    rtype = swcfg["rtype"]

    print("========================================================")
    print(mode)
    print("========================================================")
    if mode == "unipolar":
        # all values in unipolar are non-negative
        # dividend is always non greater than divisor
        # divisor is non-zero
        low_bound = 0
        up_bound = 2**bitwidth
    elif mode == "bipolar":
        # values in bipolar are arbitrarily positive or negative
        # abs of dividend is always non greater than abs of divisor
        # abs of divisor is non-zero
        low_bound = -2**(bitwidth - 1)
        up_bound = 2**(bitwidth - 1)

    divisor_list = []
    dividend_list = []
    for divisor_val in range(up_bound, low_bound - 1, -1):
        divisor_list.append([])
        dividend_list.append([])
        for dividend_val in range(low_bound, up_bound + 1, 1):
            divisor_list[up_bound - divisor_val].append(divisor_val)
            dividend_list[up_bound - divisor_val].append(dividend_val)

    dividend = torch.tensor(dividend_list).type(
        torch.float).div(up_bound).to(device)
    divisor = torch.tensor(divisor_list).type(
        torch.float).div(up_bound).to(device)
    quotient = dividend.div(divisor)

    # find the invalid postions in quotient
    quotient_nan = torch.isnan(quotient)
    quotient_inf = torch.isinf(quotient)
    quotient_mask = quotient_nan + quotient_inf
    quotient[quotient_mask] = 0
    quotient = quotient.clamp(-1, 1)

    result_pe_total = []
    for rand_idx in range(1, total_cnt + 1):
        quotientPE = ProgError(quotient, hwcfg).to(device)

        dividendPE = ProgError(dividend, hwcfg).to(device)
        dividendSRC = BinGen(dividend, hwcfg, swcfg)().to(device)

        divisorPE = ProgError(divisor, hwcfg).to(device)
        divisorSRC = BinGen(divisor, hwcfg, swcfg)().to(device)

        dut_div = FSUDiv(hwcfg, swcfg).to(device)

        hwcfg["dimr"] = 1
        dividendRNG = RNG(hwcfg, swcfg)().to(device)
        dividendBS = BSGen(dividendSRC, dividendRNG, swcfg).to(device)
        divisorRNG = RNG(hwcfg, swcfg)().to(device)
        divisorBS = BSGen(divisorSRC, divisorRNG, swcfg).to(device)
        with torch.no_grad():
            start_time = time.time()
            for i in range(2**bitwidth):
                dividend_bs = dividendBS(torch.tensor([i]))
                dividendPE.Monitor(dividend_bs)

                divisor_bs = divisorBS(torch.tensor([i]))
                divisorPE.Monitor(divisor_bs)

                quotient_bs = dut_div(dividend_bs, divisor_bs)
                quotientPE.Monitor(quotient_bs)

        # get the result for different rng
        result_pe = quotientPE()[1].cpu().numpy()
        result_pe[quotient_mask.cpu().numpy()] = np.nan
        result_pe_total.append(result_pe)

    # get the result for different rng
    result_pe_total = np.array(result_pe_total)

    #######################################################################
    # check the error of all simulation
    #######################################################################
    result_pe_total_no_nan = result_pe_total[~np.isnan(result_pe_total)]
    print("RMSE:{:1.4}".format(math.sqrt(np.mean(result_pe_total_no_nan**2))))
    print("MAE: {:1.4}".format(np.mean(np.abs(result_pe_total_no_nan))))
    print("bias:{:1.4}".format(np.mean(result_pe_total_no_nan)))
    print("max: {:1.4}".format(np.max(result_pe_total_no_nan)))
    print("min: {:1.4}".format(np.min(result_pe_total_no_nan)))
Exemplo n.º 6
0
def test_fsumul():
    hwcfg = {
        "width": 8,
        "mode": "bipolar",
        "dimr": 1,
        "dima": 0,
        "rng": "sobol",
        "scale": 1,
        "depth": 10,
        "entry": None,
        "static": True
    }
    swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float}
    bitwidth = hwcfg["width"]

    col = 100
    modes = ["bipolar", "unipolar"]

    for mode in modes:
        if mode == "unipolar":
            input_prob = torch.rand(col).mul(2**bitwidth).round().div(
                2**bitwidth).to(device)
            iVec = torch.rand(col).mul(2**bitwidth).round().div(
                2**bitwidth).to(device)
        elif mode == "bipolar":
            input_prob = torch.rand(col).mul(2).sub(1).mul(
                2**bitwidth).round().div(2**bitwidth).to(device)
            iVec = torch.rand(col).mul(2).sub(1).mul(2**bitwidth).round().div(
                2**bitwidth).to(device)

        hwcfg["mode"] = mode

        dut_mul = FSUMul(input_prob, hwcfg, swcfg).to(device)

        oVec = torch.mul(iVec, input_prob).mul(2**bitwidth).round().div(
            2**bitwidth).to(device)

        iVecPE = ProgError(iVec, hwcfg).to(device)
        oVecPE = ProgError(oVec, hwcfg).to(device)

        iVecSource = BinGen(iVec, hwcfg, swcfg)().to(device)
        iVecRNG = RNG(hwcfg, swcfg)().to(device)
        iVecBS = BSGen(iVecSource, iVecRNG, swcfg).to(device)

        with torch.no_grad():
            start_time = time.time()
            for i in range(
                    2**
                    bitwidth):  # unary cycle count 2^n for n-bit binary data
                iBS = iVecBS(torch.tensor([i]))  # input bit stream generation
                iVecPE.Monitor(iBS)  # input accuracy measurement
                oVecU = dut_mul(iBS)  # computing kernel, e.g., multiplication
                oVecPE.Monitor(oVecU)  # output accuracy measurement
            print("--- %s seconds ---" % (time.time() - start_time))
            print("input error: ", torch.min(iVecPE()[1]),
                  torch.max(iVecPE()[1]))
            print("output error: ", torch.min(oVecPE()[1]),
                  torch.max(oVecPE()[1]))
            result_pe = oVecPE()[1].cpu().numpy()
            print("RMSE", math.sqrt(sum(result_pe**2) / len(result_pe)))
            print("bias", sum(result_pe) / len(result_pe))
Exemplo n.º 7
0
def test_fsusqrt(mode="unipolar",
                 bitwidth=8,
                 emit=True,
                 jk_trace=False,
                 depth_kernel=1,
                 depth_sr=4,
                 savepdf=False,
                 total_cnt=1):
    hwcfg = {
        "width": bitwidth,
        "mode": mode,
        "dima": 0,
        "scale": 1,
        "depth": 10,
        "entry": None,
        "jk_trace": jk_trace,
        "emit": emit,
        "entry_kn": depth_kernel,
        "entry_sr": depth_sr,
        "rng": "Sobol",
        "dimr": 4
    }
    swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float}
    bitwidth = hwcfg["width"]
    rng = hwcfg["rng"]

    print("========================================================")
    print(mode)
    print("========================================================")
    # all input values are non-negative
    low_bound = 0
    if mode == "unipolar":
        up_bound = 2**bitwidth
    elif mode == "bipolar":
        low_bound = 0
        up_bound = 2**(bitwidth - 1)

    input_list = []
    for input_val in range(low_bound, up_bound + 1, 1):
        input_list.append(input_val)

    input = torch.tensor(input_list).type(torch.float).div(up_bound).to(device)

    output = torch.sqrt(input).to(device)

    result_pe_total = []
    for rand_idx in range(1, total_cnt + 1):
        outputPE = ProgError(output, hwcfg).to(device)

        inputPE = ProgError(input, hwcfg).to(device)
        inputSRC = BinGen(input, hwcfg, swcfg)().to(device)

        dut_sqrt = FSUSqrt(hwcfg, swcfg).to(device)

        inputRNG = RNG(hwcfg, swcfg)().to(device)
        inputBS = BSGen(inputSRC, inputRNG, swcfg).to(device)
        with torch.no_grad():
            start_time = time.time()
            for i in range(2**bitwidth):
                input_bs = inputBS(torch.tensor([i]))
                inputPE.Monitor(input_bs)

                ouyput_bs = dut_sqrt(input_bs)
                outputPE.Monitor(ouyput_bs)

        # get the result for different rng
        result_pe = outputPE()[1].cpu().numpy()
        result_pe_total.append(result_pe)

    # get the result for different rng
    result_pe_total = np.array(result_pe_total)

    #######################################################################
    # check the error of all simulation
    #######################################################################
    print("RMSE:{:1.4}".format(math.sqrt(np.mean(result_pe_total**2))))
    print("MAE: {:1.4}".format(np.mean(np.abs(result_pe_total))))
    print("bias:{:1.4}".format(np.mean(result_pe_total)))
    print("max: {:1.4}".format(np.max(result_pe_total)))
    print("min: {:1.4}".format(np.min(result_pe_total)))
Exemplo n.º 8
0
    def __init__(
        self,
        in_channels,
        out_channels,
        kernel_size,
        stride=1,
        padding=0,
        dilation=1,
        groups=1,
        bias=True,
        padding_mode='zeros',
        weight_ext=None,
        bias_ext=None,
        hwcfg={
            "widthi": 8,
            "rngi": "Sobol",
            "quantilei": 1,
            "widthw": 8,
            "rngw": "Sobol",
            "quantilew": 1,
            "cycle": 128,
            "rounding": "round",
            "signmag": True
        }):
        super(HUBConv2d,
              self).__init__(in_channels, out_channels, kernel_size, stride,
                             padding, dilation, groups, bias, padding_mode)
        self.hwcfg = {}
        self.hwcfg["widthi"] = hwcfg["widthi"]
        self.hwcfg["rngi"] = hwcfg["rngi"].lower()
        self.hwcfg["quantilei"] = hwcfg["quantilei"]
        self.hwcfg["widthw"] = hwcfg["widthw"]
        self.hwcfg["rngw"] = hwcfg["rngw"].lower()
        self.hwcfg["quantilew"] = hwcfg["quantilew"]
        self.hwcfg["rounding"] = hwcfg["rounding"].lower()
        self.hwcfg["signmag"] = hwcfg["signmag"]
        self.hwcfg["cycle"] = min(
            hwcfg["cycle"],
            2**(max(hwcfg["widthi"], hwcfg["widthw"]) - hwcfg["signmag"]))

        self.itc = (self.hwcfg["rngi"] in ["race", "tc", "race10", "tc10"])
        self.wtc = (self.hwcfg["rngw"] in ["race", "tc", "race10", "tc10"])

        assert not (self.itc and self.wtc), \
            "Error: the hw config 'rngi' and 'rngw' in " + str(self) + " class can't adopt temporal coding simultaneously."

        assert self.hwcfg["quantilei"] > 0 and self.hwcfg["quantilei"] <= 1, \
            "Error: the hw config 'quantilei' of " + str(self) + " class needs to be within (0, 1]."

        assert self.hwcfg["quantilew"] > 0 and self.hwcfg["quantilew"] <= 1, \
            "Error: the hw config 'quantilew' of " + str(self) + " class needs to be within (0, 1]."

        assert self.hwcfg["rounding"] in ["round", "ceil", "floor"], \
            "Error: the hw config 'rounding' of " + str(self) + " class requires one of ['round', 'ceil', 'floor']."

        assert self.hwcfg["signmag"] is True, \
            "Error: the hw config 'signmag' of " + str(self) + " class requires to be True, i.e., always computing on sign-magnitue data, for diverse architectures."

        # maximum possible run cycle
        self.cycle_max = 2**(max(hwcfg["widthi"], hwcfg["widthw"]) -
                             hwcfg["signmag"])
        # actual run cycle
        self.cycle_act = min(
            hwcfg["cycle"],
            2**(max(hwcfg["widthi"], hwcfg["widthw"]) - hwcfg["signmag"]))

        assert groups == 1, \
            "Error: the 'groups' in " + str(self) + " class requires to be 1."
        assert padding_mode == 'zeros', \
            "Error: the 'padding_mode' in " + str(self) + " class requires to be 'zeros'."

        # weight and bias
        if weight_ext is not None:
            assert (weight_ext.size()[0], weight_ext.size()[1], weight_ext.size()[2], weight_ext.size()[3]) == (out_channels, in_channels, num2tuple(kernel_size)[0], num2tuple(kernel_size)[1]), \
                "Error: the hw config 'out_channels, in_channels, kernel_size' in " + str(self) + " class unmatches the binary weight shape."
            self.weight.data = weight_ext

        if bias and (bias_ext is not None):
            assert bias_ext.size()[0] == out_channels, \
                "Error: the hw config 'out_channels' in " + str(self) + " class unmatches the binary bias shape."
            self.bias.data = bias_ext

        swcfg = {
            "btype": torch.float,
            "rtype": torch.float,
            "stype": torch.float
        }

        # random_sequence from RNG
        hwcfg_irng = {
            "width": self.hwcfg["widthi"] - self.hwcfg["signmag"],
            "dimr": 1,
            "rng": self.hwcfg["rngi"]
        }
        self.irng = RNG(hwcfg_irng, swcfg)()
        hwcfg_wrng = {
            "width": self.hwcfg["widthw"] - self.hwcfg["signmag"],
            "dimr": 1,
            "rng": self.hwcfg["rngw"]
        }
        self.wrng = RNG(hwcfg_wrng, swcfg)()

        if (self.itc) and (not self.wtc):
            # cbsg controller is input
            self.rngctler = self.irng
            self.rngctlee = self.wrng
        elif (not self.itc) and (self.wtc):
            # cbsg controller is weight
            self.rngctler = self.wrng
            self.rngctlee = self.irng
        elif (not self.itc) and (not self.wtc):
            # when rate coding is applied to both input and weight, always control weight with input
            # the hardware cost of doing this is similar to the opposite
            self.rngctler = self.irng
            self.rngctlee = self.wrng

        # generate the value map for mul using current rng
        # dim 0 is input index
        # the tensor input value is the actual value produced by the rngctler
        self.mapctler = torch.nn.Parameter(torch.empty(self.cycle_max),
                                           requires_grad=False)
        cycle_ctlerval = torch.empty(0)
        torch.cat(
            self.cycle_max *
            [torch.arange(self.cycle_max, dtype=torch.float).unsqueeze(1)],
            1,
            out=cycle_ctlerval)
        cycle_ctlerbit = torch.empty(0)
        torch.gt(cycle_ctlerval,
                 self.rngctler.unsqueeze(0),
                 out=cycle_ctlerbit)
        self.mapctler.data = torch.sum(cycle_ctlerbit,
                                       1).squeeze_().type(torch.long)

        # dim 0 is input index, dim 1 is weight index
        # the tensor value is the actual weight value produced by the rngctlee, under a specific input and weight
        self.mapctlee = torch.nn.Parameter(torch.empty(self.cycle_max,
                                                       self.cycle_max),
                                           requires_grad=False)
        cycle_ctleebit = torch.empty(0)
        torch.gt(cycle_ctlerval,
                 self.rngctlee.unsqueeze(0),
                 out=cycle_ctleebit)
        for c in range(self.cycle_max):
            self.mapctlee.data[c] = torch.sum(
                cycle_ctleebit[:, 0:self.mapctler.data[c]], 1).squeeze_()

        self.rshift_i = None
        self.rshift_w = None
        self.rshift_o = None
Exemplo n.º 9
0
    def __init__(
            self,
            hwcfg={
                "mode": "bipolar",
                "jk_trace": True,
                "emit": True,
                "entry_kn": 1,
                "entry_sr": 2,
                "rng": "Sobol",
                "dimr": 4
            },
            swcfg={
                "stype": torch.float,
                "btype": torch.float
            }):
        super(FSUSqrt, self).__init__()
        self.hwcfg = {}
        self.hwcfg["mode"] = hwcfg["mode"].lower()
        self.hwcfg["jk_trace"] = hwcfg["jk_trace"]
        self.hwcfg["emit"] = hwcfg["emit"]
        self.hwcfg["entry_kn"] = hwcfg["entry_kn"]
        self.hwcfg["entry_sr"] = hwcfg["entry_sr"]
        self.hwcfg["rng"] = hwcfg["rng"].lower()
        self.hwcfg["dimr"] = hwcfg["dimr"]

        self.swcfg = {}
        self.swcfg["btype"] = swcfg["btype"]
        self.swcfg["stype"] = swcfg["stype"]

        # data representation
        self.mode = hwcfg["mode"].lower()
        assert self.mode in ["unipolar", "bipolar"], \
            "Error: the hw config 'mode' in " + str(self) + " class requires one of ['unipolar', 'bipolar']."

        self.jk_trace = hwcfg["jk_trace"]
        self.emit = hwcfg["emit"]
        self.entry_kn = hwcfg["entry_kn"]
        self.entry_sr = hwcfg["entry_sr"]
        assert math.ceil(math.log2(self.entry_kn)) == math.floor(math.log2(self.entry_kn)) , \
            "Eroor: the hw config 'entry_kn' in " + str(self) + " class needs to be power of 2."
        assert math.ceil(math.log2(self.entry_sr)) == math.floor(math.log2(self.entry_sr)) , \
            "Eroor: the hw config 'entry_sr' in " + str(self) + " class needs to be power of 2."

        self.stype = swcfg["stype"]
        if self.emit is True:
            self.emit_out = torch.nn.Parameter(torch.zeros(1).type(torch.int8),
                                               requires_grad=False)
            hwcfg_add = {
                "mode": "unipolar",
                "scale": 1,
                "dima": 0,
                "depth": 10,
                "entry": None,
            }
            swcfg_add = {
                "btype": swcfg["btype"],
                "stype": torch.int8,
            }
            self.nsadd = FSUAdd(hwcfg_add, swcfg_add)
            hwcfg_sr = {"entry": self.entry_sr}
            swcfg_sr = {
                "btype": swcfg["btype"],
                "stype": torch.int8,
            }
            self.sr = ShiftReg(hwcfg_sr, swcfg_sr)
            hwcfg_rng = {
                "width": int(math.log2(self.entry_sr)),
                "dimr": 1,
                "rng": hwcfg["rng"]
            }
            swcfg_rng = {"rtype": torch.long}
            self.rng = RNG(hwcfg_rng, swcfg_rng)()
            self.idx = torch.nn.Parameter(torch.zeros(1).type(torch.long),
                                          requires_grad=False)
            if self.mode == "bipolar":
                hwcfg_b2u = {"depth": 3}
                swcfg_b2u = {"btype": swcfg["btype"], "stype": torch.int8}
                self.bi2uni_emit = Bi2Uni(hwcfg_b2u, swcfg_b2u)
        else:
            self.trace = torch.nn.Parameter(torch.zeros(1).type(torch.int8),
                                            requires_grad=False)
            if self.mode == "bipolar":
                hwcfg_b2u = {"depth": 3}
                swcfg_b2u = {"btype": swcfg["btype"], "stype": torch.int8}
                self.bi2uni = Bi2Uni(hwcfg_b2u, swcfg_b2u)
            if self.jk_trace is True:
                swcfg_jkff = {"stype": torch.int8}
                self.jkff = JKFF(swcfg_jkff)
            else:
                hwcfg_cordiv_kernel = {
                    "entry": 4,
                    "rng": hwcfg["rng"],
                    "dimr": 4
                }
                swcfg_cordiv_kernel = {"stype": torch.int8}
                self.cordiv_kernel = CORDIV_kernel(hwcfg_cordiv_kernel,
                                                   swcfg_cordiv_kernel)
                self.dff = torch.nn.Parameter(torch.zeros(1).type(torch.int8),
                                              requires_grad=False)
Exemplo n.º 10
0
    def __init__(self,
                 in_1_prob=None,
                 hwcfg={
                     "width": 8,
                     "mode": "bipolar",
                     "static": False,
                     "rng": "Sobol",
                     "dimr": 1
                 },
                 swcfg={
                     "rtype": torch.float,
                     "stype": torch.float
                 }):
        super(FSUMul, self).__init__()

        self.hwcfg = {}
        self.hwcfg["width"] = hwcfg["width"]
        self.hwcfg["mode"] = hwcfg["mode"].lower()
        self.hwcfg["static"] = hwcfg["static"]
        self.hwcfg["rng"] = hwcfg["rng"].lower()
        self.hwcfg["dimr"] = hwcfg["dimr"]

        self.swcfg = {}
        self.swcfg["rtype"] = swcfg["rtype"]
        self.swcfg["stype"] = swcfg["stype"]

        self.entry = 2**hwcfg["width"]
        self.static = hwcfg["static"]
        self.stype = swcfg["stype"]
        self.rtype = swcfg["rtype"]

        self.mode = hwcfg["mode"].lower()
        assert self.mode in ["unipolar", "bipolar"], \
            "Error: the hw config 'mode' in " + str(self) + " class requires one of ['unipolar', 'bipolar']."

        # the random number generator used in computation
        self.rng = RNG(hwcfg, swcfg)()

        if self.static is True:
            # the probability of in_1 used in static computation
            self.in_1_prob = in_1_prob
            assert in_1_prob is not None, \
                "Error: the static multiplier requires in_1_prob in " + str(self) + " class."
            # directly create an unchange bitstream generator for static computation
            self.source_gen = BinGen(self.in_1_prob, hwcfg, swcfg)()
            self.bsg = BSGen(self.source_gen, self.rng, {"stype": torch.int8})
            # rng_idx is used later as an enable signal, get update every cycled
            self.rng_idx = torch.nn.Parameter(torch.zeros(1).type(torch.long),
                                              requires_grad=False)

            # Generate two seperate bitstream generators and two enable signals for bipolar mode
            if self.mode == "bipolar":
                self.bsg_inv = BSGen(self.source_gen, self.rng,
                                     {"stype": torch.int8})
                self.rng_idx_inv = torch.nn.Parameter(torch.zeros(1).type(
                    torch.long),
                                                      requires_grad=False)
        else:
            # use a shift register to store the count of 1s in one bitstream to generate data
            sr_hwcfg = {"entry": self.entry}
            self.sr = ShiftReg(sr_hwcfg, swcfg)
            self.rng_idx = torch.nn.Parameter(torch.zeros(1).type(torch.long),
                                              requires_grad=False)
            if self.mode == "bipolar":
                self.rng_idx_inv = torch.nn.Parameter(torch.zeros(1).type(
                    torch.long),
                                                      requires_grad=False)
Exemplo n.º 11
0
def test_rng():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    hwcfg = {
        "width" : 4, 
        "dimr" : 1, 
        "rng" : "sobol"
    }
    swcfg={
        "rtype" : torch.float
    }
    rng = RNG(hwcfg, swcfg)()
    print(hwcfg["rng"], rng.to(device))

    hwcfg["rng"] = "rc"
    rng = RNG(hwcfg, swcfg)()
    print(hwcfg["rng"], rng.to(device))

    hwcfg["rng"] = "race"
    rng = RNG(hwcfg, swcfg)()
    print(hwcfg["rng"], rng.to(device))

    hwcfg["rng"] = "tc"
    rng = RNG(hwcfg, swcfg)()
    print(hwcfg["rng"], rng.to(device))

    hwcfg["rng"] = "lfsr"
    rng = RNG(hwcfg, swcfg)()
    print(hwcfg["rng"], rng.to(device))

    hwcfg["rng"] = "sys"
    rng = RNG(hwcfg, swcfg)()
    print(hwcfg["rng"], rng.to(device))

    hwcfg["rng"] = "race10"
    rng = RNG(hwcfg, swcfg)()
    print(hwcfg["rng"], rng.to(device))

    hwcfg["rng"] = "tc10"
    rng = RNG(hwcfg, swcfg)()
    print(hwcfg["rng"], rng.to(device))
def test_fsumul_in_stream():
    bitwidth = 12
    depth = 4
    hwcfg = {
        "width": bitwidth,
        "mode": "bipolar",
        "dimr": 1,
        "dima": 0,
        "rng": "sobol",
        "scale": 1,
        "depth": 10,
        "entry": None,
        "static": False
    }
    swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float}

    col = 100
    modes = ["bipolar", "unipolar"]

    for mode in modes:
        if mode == "unipolar":
            input_prob_0 = torch.rand(col).mul(2**bitwidth).round().div(
                2**bitwidth).to(device)
            input_prob_1 = torch.rand(col).mul(2**bitwidth).round().div(
                2**bitwidth).to(device)
        elif mode == "bipolar":
            input_prob_0 = torch.rand(col).mul(2).sub(1).mul(
                2**bitwidth).round().div(2**bitwidth).to(device)
            input_prob_1 = torch.rand(col).mul(2).sub(1).mul(
                2**bitwidth).round().div(2**bitwidth).to(device)

        hwcfg["mode"] = mode

        hwcfg["width"] = depth
        dut_mul = FSUMul(None, hwcfg, swcfg).to(device)
        hwcfg["width"] = bitwidth

        oVec = torch.mul(input_prob_0, input_prob_1).mul(
            2**bitwidth).round().div(2**bitwidth).to(device)

        prob_0_PE = ProgError(input_prob_0, hwcfg).to(device)
        prob_1_PE = ProgError(input_prob_1, hwcfg).to(device)

        oVecPE = ProgError(oVec, hwcfg).to(device)

        prob_0_Source = BinGen(input_prob_0, hwcfg, swcfg)().to(device)
        prob_1_Source = BinGen(input_prob_1, hwcfg, swcfg)().to(device)

        iVecRNG0 = RNG(hwcfg, swcfg)().to(device)
        iVecRNG1 = RNG(hwcfg, swcfg)().to(device)
        prob_0_BS = BSGen(prob_0_Source, iVecRNG0, swcfg).to(device)
        prob_1_BS = BSGen(prob_1_Source, iVecRNG1, swcfg).to(device)

        with torch.no_grad():
            start_time = time.time()
            idx = torch.zeros(input_prob_0.size()).type(torch.long).to(device)
            for i in range(2**bitwidth):
                #print(i)
                iBS_0 = prob_0_BS(idx + i)
                iBS_1 = prob_1_BS(idx + i)

                prob_0_PE.Monitor(iBS_0)
                prob_1_PE.Monitor(iBS_1)

                oVecU = dut_mul(iBS_0, iBS_1)
                oVecPE.Monitor(oVecU)
            print("--- %s seconds ---" % (time.time() - start_time))
            print(mode)
            print("input 0 error: ", "min:", torch.min(prob_0_PE()[1]), "max:",
                  torch.max(prob_0_PE()[1]))
            print("input 1 error: ", "min:", torch.min(prob_1_PE()[1]), "max:",
                  torch.max(prob_1_PE()[1]))

            print("output error: ", "min:", torch.min(oVecPE()[1]), "max:",
                  torch.max(oVecPE()[1]), "rmse:",
                  torch.sqrt(torch.mean(torch.mul(oVecPE()[1],
                                                  oVecPE()[1]))), "bias:",
                  torch.mean(oVecPE()[1]))
Exemplo n.º 13
0
def test_fsusignabs():
    total_cnt = 5
    hwcfg = {
        "width": 8,
        "mode": "bipolar",
        "dimr": 1,
        "rng": "sobol",
        "scale": 1,
        "depth": 5
    }
    swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float}
    bitwidth = hwcfg["width"]
    mode = hwcfg["mode"]

    print("========================================================")
    print(mode)
    print("========================================================")
    # all input values are non-negative
    low_bound = 0
    if mode == "unipolar":
        up_bound = 2**bitwidth
    elif mode == "bipolar":
        low_bound = -2**(bitwidth - 1)
        up_bound = 2**(bitwidth - 1)

    input_list = []
    for input_val in range(low_bound, up_bound + 1, 1):
        input_list.append(input_val)

    input = torch.tensor(input_list).type(torch.float).div(up_bound).to(device)
    #     input = torch.tensor([-1/256]).type(torch.float).div(up_bound).to(device)

    output = torch.abs(input).to(device)

    result_pe_total = []
    for rand_idx in range(1, total_cnt + 1):
        outputPE = ProgError(output, hwcfg).to(device)

        inputPE = ProgError(input, hwcfg).to(device)
        inputSRC = BinGen(input, hwcfg, swcfg)().to(device)

        dut = FSUSignAbs(hwcfg, swcfg).to(device)

        inputRNG = RNG(hwcfg, swcfg)().to(device)
        inputBS = BSGen(inputSRC, inputRNG, swcfg).to(device)
        with torch.no_grad():
            start_time = time.time()
            for i in range(2**bitwidth):
                input_bs = inputBS(torch.tensor([i]))
                inputPE.Monitor(input_bs)

                _, output_bs = dut(input_bs)
                outputPE.Monitor(output_bs)

        # get the result for different rng
        result_pe = outputPE()[1].cpu().numpy()
        result_pe_total.append(result_pe)

    # get the result for different rng
    result_pe_total = np.array(result_pe_total)

    #######################################################################
    # check the error of all simulation
    #######################################################################
    print("RMSE:{:1.4}".format(np.sqrt(np.mean(result_pe_total**2))))
    print("MAE: {:1.4}".format(np.mean(np.abs(result_pe_total))))
    print("bias:{:1.4}".format(np.mean(result_pe_total)))
    print("max: {:1.4}".format(np.max(result_pe_total)))
    print("min: {:1.4}".format(np.min(result_pe_total)))
Exemplo n.º 14
0
    def __init__(self,
                 in_features,
                 out_features,
                 bias=True,
                 weight_ext=None,
                 bias_ext=None,
                 hwcfg={
                     "width": 8,
                     "mode": "bipolar",
                     "rng": "Sobol",
                     "dimr": 1
                 },
                 swcfg={
                     "btype": torch.float,
                     "rtype": torch.float,
                     "stype": torch.float
                 }):
        super(FSULinearPC, self).__init__(in_features, out_features, bias=bias)
        self.hwcfg = {}
        self.hwcfg["width"] = hwcfg["width"]
        self.hwcfg["mode"] = hwcfg["mode"].lower()
        self.hwcfg["rng"] = hwcfg["rng"].lower()
        self.hwcfg["dimr"] = hwcfg["dimr"]

        self.swcfg = {}
        self.swcfg["btype"] = swcfg["btype"]
        self.swcfg["rtype"] = swcfg["rtype"]
        self.swcfg["stype"] = swcfg["stype"]

        self.mode = hwcfg["mode"].lower()
        assert self.mode in ["unipolar", "bipolar"], \
            "Error: the hw config 'mode' in " + str(self) + " class requires one of ['unipolar', 'bipolar']."

        # bias indication for original linear layer
        self.has_bias = bias

        # RNG for weight
        hwcfg_wrng = {
            "width": hwcfg["width"],
            "rng": hwcfg["rng"],
            "dimr": hwcfg["dimr"]
        }
        self.wrng = RNG(hwcfg_wrng, swcfg)()
        if hwcfg["rng"].lower() in ["race", "tc", "race10", "tc10"]:
            self.wtc = True
        else:
            self.wtc = False

        # define the linear weight and bias
        if weight_ext is not None:
            assert (weight_ext.size()[0], weight_ext.size()[1]) == (out_features, in_features), \
                "Error: the hw config 'out_features, in_features' in " + str(self) + " class unmatches the binary weight shape."
            self.weight.data = BinGen(weight_ext, self.hwcfg, self.swcfg)()

        if bias and (bias_ext is not None):
            assert bias_ext.size()[0] == out_features, \
                "Error: the hw config 'out_features' in " + str(self) + " class unmatches the binary bias shape."
            self.bias.data = BinGen(bias_ext, self.hwcfg, self.swcfg)()
            # RNG for bias, same as RNG for weight
            hwcfg_brng = {
                "width": hwcfg["width"],
                "rng": hwcfg["rng"],
                "dimr": hwcfg["dimr"]
            }
            self.brng = RNG(hwcfg_brng, swcfg)()

        # define the kernel linear for input bit 1
        self.wbsg_i1 = BSGen(self.weight, self.wrng, swcfg)
        self.wrdx_i1 = torch.nn.Parameter(torch.zeros_like(self.weight,
                                                           dtype=torch.long),
                                          requires_grad=False).unsqueeze(0)
        if self.has_bias is True:
            self.bbsg = BSGen(self.bias, self.brng, swcfg)
            self.brdx = torch.nn.Parameter(torch.zeros_like(self.bias,
                                                            dtype=torch.long),
                                           requires_grad=False)

        # if bipolar, define a kernel for input bit 0, note that there is no bias required for this kernel
        if (self.mode == "bipolar") and (self.wtc is False):
            self.wbsg_i0 = BSGen(self.weight, self.wrng, swcfg)
            self.wrdx_i0 = torch.nn.Parameter(torch.zeros_like(
                self.weight, dtype=torch.long),
                                              requires_grad=False).unsqueeze(0)
Exemplo n.º 15
0
def test_fsuadd():
    hwcfg = {
        "width": 12,
        "mode": "bipolar",
        "dimr": 1,
        "dima": 0,
        "rng": "sobol",
        "scale": 1,
        "depth": 20,
        "entry": None
    }
    swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float}
    bitwidth = hwcfg["width"]
    rng = hwcfg["rng"]

    plot_en = False
    modes = ["bipolar", "unipolar"]
    size = [128, 256, 512]

    scaled = [True, False]
    result_pe = []

    for mode in modes:
        for scale in scaled:
            run_time = 0
            acc_dim = hwcfg["dima"]
            scale_mod = size[acc_dim]
            result_pe_cycle = []
            hwcfg["mode"] = mode
            hwcfg["scale"] = scale_mod if scale else 1
            uadd = FSUAdd(hwcfg, swcfg).to(device)

            if mode == "unipolar":
                iVec = torch.rand(size).mul(2**bitwidth).round().div(
                    2**bitwidth).to(device)
            elif mode == "bipolar":
                iVec = torch.rand(size).mul(2).sub(1).mul(
                    2**bitwidth).round().div(2**bitwidth).to(device)

            oVec = torch.sum(iVec, acc_dim).to(device)

            iVecSource = BinGen(iVec, hwcfg, swcfg)().to(device)
            iVecRNG = RNG(hwcfg, swcfg)().to(device)
            iVecBS = BSGen(iVecSource, iVecRNG, swcfg).to(device)
            hwcfg["scale"] = 1
            iVecPE = ProgError(iVec, hwcfg).to(device)
            print("iVecPE cfg", iVecPE.hwcfg)
            hwcfg["scale"] = scale_mod if scale else 1
            oVecPE = ProgError(oVec, hwcfg).to(device)
            print("oVecPE cfg", oVecPE.hwcfg)

            with torch.no_grad():
                idx = torch.zeros(iVecSource.size()).type(
                    torch.long).to(device)
                for i in range(2**bitwidth):
                    iBS = iVecBS(idx + i)
                    iVecPE.Monitor(iBS)

                    start_time = time.time()
                    oVecU = uadd(iBS)
                    run_time = time.time() - start_time + run_time

                    if i == 0:
                        print("uadd cfg", uadd.hwcfg)

                    oVecPE.Monitor(oVecU)
                    rmse = torch.sqrt(
                        torch.mean(torch.mul(oVecPE()[1],
                                             oVecPE()[1])))
                    result_pe_cycle.append(1 - rmse.item())
                print("--- %s seconds ---" % (time.time() - start_time))
                print("RNG: " + rng + ", data: " + mode + ", scaled: " +
                      str(scale))
                print("input error:  ", "min: ",
                      torch.min(iVecPE()[1]).item(), "max: ",
                      torch.max(iVecPE()[1]).item())
                print("output error: ", "min: ",
                      torch.min(oVecPE()[1]).item(), "max: ",
                      torch.max(oVecPE()[1]).item(), "RMSE: ", rmse.item())
                print()
                if plot_en is True:
                    result_pe = oVecPE()[1].cpu().numpy()
                    print("error distribution=========>")
                    plt.figure(figsize=(3, 1.5))
                    fig = plt.hist(
                        result_pe.flatten(),
                        bins='auto')  # arguments are passed to np.histogram
                    plt.show()
                    print("progressive accuracy=========>")
                    plt.figure(figsize=(3, 1.5))
                    fig = plt.plot(result_pe_cycle
                                   )  # arguments are passed to np.histogram
                    plt.show()
Exemplo n.º 16
0
def test_fsumgu():
    bitwidth_list = [7, 8, 9, 10]
    for bitwidth in bitwidth_list:
        print("bit width:", bitwidth)
        win_sz = 10
        batch = 32
        input_sz = 256
        hidden_sz = 64

        intwidth = 1

        fracwidth = bitwidth - intwidth
        mode = "bipolar"
        depth = bitwidth + 2
        depth_ismul = bitwidth - 4
        rng = "Sobol"
        bias = False
        output_error_only = True

        hwcfg = {
            "width": bitwidth,
            "mode": mode,
            "depth": depth,
            "depth_ismul": depth_ismul,
            "rng": rng,
            "dimr": 1,
            "scale": 1
        }
        swcfg = {
            "btype": torch.float,
            "rtype": torch.float,
            "stype": torch.float
        }

        input = torch.randn(win_sz, batch, input_sz).to(device)
        input = truncated_normal(input, mean=0, std=0.4)
        hx1 = torch.randn(batch, hidden_sz).to(device)
        hx1 = truncated_normal(hx1, mean=0, std=0.1)
        hx2 = hx1.clone().detach().to(device)
        hx3 = hx1.clone().detach().to(device)
        hx4 = hx1.clone().detach().to(device)
        output1 = []
        output2 = []
        output3 = []
        output4 = []

        rnn1 = HardMGUCell(input_sz, hidden_sz, bias=bias,
                           hard=True).to(device)
        rnn3 = HardMGUCellFXP(input_sz,
                              hidden_sz,
                              bias=bias,
                              hard=True,
                              intwidth=intwidth,
                              fracwidth=fracwidth).to(device)
        rnn3.weight_f.data = rnn1.weight_f.clone().detach().to(device)
        rnn3.weight_n.data = rnn1.weight_n.clone().detach().to(device)

        rnn4 = HUBMGUCell(input_sz,
                          hidden_sz,
                          bias=bias,
                          weight_ext_f=rnn1.weight_f,
                          bias_ext_f=rnn1.bias_f,
                          weight_ext_n=rnn1.weight_n,
                          bias_ext_n=rnn1.bias_n,
                          hwcfg=hwcfg).to(device)

        for i in range(win_sz):
            hx1 = rnn1(input[i], hx1)
            output1.append(hx1)

            hx3 = rnn3(input[i], hx3)
            output3.append(hx3)

            hx4 = rnn4(input[i], hx4)
            output4.append(hx4)

            iVec, hVec = input[i], hx2

            # rnn2 in the loop to mimic the hw reset
            rnn2 = FSUMGUCell(input_sz,
                              hidden_sz,
                              bias=bias,
                              weight_ext_f=rnn1.weight_f,
                              bias_ext_f=rnn1.bias_f,
                              weight_ext_n=rnn1.weight_n,
                              bias_ext_n=rnn1.bias_n,
                              hx_buffer=hx2,
                              hwcfg=hwcfg,
                              swcfg=swcfg).to(device)

            iSource = BinGen(iVec, hwcfg, swcfg)().to(device)
            iRNG = RNG(hwcfg, swcfg)().to(device)
            iBSG = BSGen(iSource, iRNG, swcfg).to(device)
            iPE = ProgError(iVec, hwcfg).to(device)

            hSource = BinGen(hVec, hwcfg, swcfg)().to(device)
            hRNG = RNG(hwcfg, swcfg)().to(device)
            hBSG = BSGen(hSource, hRNG, swcfg).to(device)
            hPE = ProgError(hVec, hwcfg).to(device)

            oVec = output1[i]
            oPE = ProgError(oVec, hwcfg).to(device)

            fg_ug_in_PE = ProgError(rnn1.fg_ug_in, hwcfg).to(device)
            fg_in_PE = ProgError(rnn1.fg_in, hwcfg).to(device)
            fg_PE = ProgError(rnn1.fg, hwcfg).to(device)
            fg_hx_PE = ProgError(rnn1.fg_hx, hwcfg).to(device)
            ng_ug_in_PE = ProgError(rnn1.ng_ug_in, hwcfg).to(device)
            ng_PE = ProgError(rnn1.ng, hwcfg).to(device)
            fg_ng_PE = ProgError(rnn1.fg_ng, hwcfg).to(device)
            fg_ng_inv_PE = ProgError(rnn1.fg_ng_inv, hwcfg).to(device)

            for c in range(2**bitwidth):
                idx = torch.zeros(iSource.size()).type(torch.long).to(device)
                iBS = iBSG(idx + c)
                iPE.Monitor(iBS)

                hdx = torch.zeros(hSource.size()).type(torch.long).to(device)
                hBS = hBSG(hdx + c)
                hPE.Monitor(hBS)

                start_time = time.time()

                oBS = rnn2(iBS, hBS)

                fg_ug_in_PE.Monitor(rnn2.fg_ug_in)
                fg_in_PE.Monitor(rnn2.fg_in)
                fg_PE.Monitor(rnn2.fg)
                fg_hx_PE.Monitor(rnn2.fg_hx)
                ng_ug_in_PE.Monitor(rnn2.ng_ug_in)
                ng_PE.Monitor(rnn2.ng)
                fg_ng_PE.Monitor(rnn2.fg_ng)
                fg_ng_inv_PE.Monitor(rnn2.fg_ng_inv)

                oPE.Monitor(oBS)

            hx2 = oPE()[0]
            output2.append(hx2)

            # print("======>> window: " + str(i) + "<<======")
            # print("--- %s seconds ---" % (time.time() - start_time))
            if output_error_only:
                pass
            else:
                progerror_report(iPE, "input")
                progerror_report(hPE, "hidden")

                progerror_report(fg_ug_in_PE, "fg_ug_in")
                progerror_report(fg_in_PE, "fg_in")
                progerror_report(fg_PE, "fg")
                progerror_report(fg_hx_PE, "fg_hx")
                progerror_report(ng_ug_in_PE, "ng_ug_in")
                progerror_report(ng_PE, "ng")
                progerror_report(fg_ng_PE, "fg_ng")
                progerror_report(fg_ng_inv_PE, "fg_ng_inv")

            progerror_report(oPE, str(i) + "-th win output fsu")

            hub_err = hx1 - hx4
            min = hub_err.min().item()
            max = hub_err.max().item()
            rmse = torch.sqrt(torch.mean(torch.square(hub_err)))
            std, mean = torch.std_mean(hub_err)
            print("{:30s}".format(str(i)+"-th win output hub") + \
                    ", Absolute Error range," + "{:12f}".format(min) + ", {:12f}".format(max) + \
                    ", std," + "{:12f}".format(std) + \
                    ", mean," + "{:12f}".format(mean) + \
                    ", rmse," + "{:12f}".format(rmse))

            fxp_err = hx1 - hx3
            min = fxp_err.min().item()
            max = fxp_err.max().item()
            rmse = torch.sqrt(torch.mean(torch.square(fxp_err)))
            std, mean = torch.std_mean(fxp_err)
            print("{:30s}".format(str(i)+"-th win output fxp") + \
                    ", Absolute Error range," + "{:12f}".format(min) + ", {:12f}".format(max) + \
                    ", std," + "{:12f}".format(std) + \
                    ", mean," + "{:12f}".format(mean) + \
                    ", rmse," + "{:12f}".format(rmse))

        print()
Exemplo n.º 17
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 bias=True,
                 padding_mode='zeros',
                 weight_ext=None,
                 bias_ext=None,
                 hwcfg={
                     "width": 8,
                     "mode": "bipolar",
                     "rng": "Sobol",
                     "dimr": 1
                 },
                 swcfg={
                     "btype": torch.float,
                     "rtype": torch.float,
                     "stype": torch.float
                 }):
        super(FSUConv2dPC, self).__init__(in_channels,
                                          out_channels,
                                          kernel_size,
                                          stride=stride,
                                          padding=padding,
                                          dilation=dilation,
                                          groups=groups,
                                          bias=bias,
                                          padding_mode=padding_mode)

        self.hwcfg = {}
        self.hwcfg["width"] = hwcfg["width"]
        self.hwcfg["mode"] = hwcfg["mode"].lower()
        self.hwcfg["rng"] = hwcfg["rng"].lower()
        self.hwcfg["dimr"] = hwcfg["dimr"]

        self.swcfg = {}
        self.swcfg["btype"] = swcfg["btype"]
        self.swcfg["rtype"] = swcfg["rtype"]
        self.swcfg["stype"] = swcfg["stype"]

        self.mode = hwcfg["mode"].lower()
        assert self.mode in ["unipolar", "bipolar"], \
            "Error: the hw config 'mode' in " + str(self) + " class requires one of ['unipolar', 'bipolar']."

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.dilation = dilation

        assert groups == 1, \
            "Error: the 'groups' in " + str(self) + " class requires to be 1."
        assert padding_mode == 'zeros', \
            "Error: the 'padding_mode' in " + str(self) + " class requires to be 'zeros'."

        # bias indication for original linear layer
        self.has_bias = bias

        # RNG for weight
        hwcfg_wrng = {
            "width": hwcfg["width"],
            "rng": hwcfg["rng"],
            "dimr": hwcfg["dimr"]
        }
        self.wrng = RNG(hwcfg_wrng, swcfg)()
        if hwcfg["rng"].lower() in ["race", "tc", "race10", "tc10"]:
            self.wtc = True
        else:
            self.wtc = False

        # define the linear weight and bias
        if weight_ext is not None:
            assert (weight_ext.size()[0], weight_ext.size()[1], weight_ext.size()[2], weight_ext.size()[3]) == (out_channels, in_channels, num2tuple(kernel_size)[0], num2tuple(kernel_size)[1]), \
                "Error: the hw config 'out_channels, in_channels, kernel_size' in " + str(self) + " class unmatches the binary weight shape."
            self.weight.data = BinGen(weight_ext, self.hwcfg, self.swcfg)()

        if bias and (bias_ext is not None):
            assert bias_ext.size()[0] == out_channels, \
                "Error: the hw config 'out_channels' in " + str(self) + " class unmatches the binary bias shape."
            self.bias.data = BinGen(bias_ext, self.hwcfg, self.swcfg)()
            # RNG for bias, same as RNG for weight
            hwcfg_brng = {
                "width": hwcfg["width"],
                "rng": hwcfg["rng"],
                "dimr": hwcfg["dimr"]
            }
            self.brng = RNG(hwcfg_brng, swcfg)()

        # define the kernel linear for input bit 1
        self.wbsg_i1 = BSGen(self.weight.view(1,
                                              self.weight.size()[0], -1),
                             self.wrng, swcfg)
        self.wrdx_i1 = torch.nn.Parameter(torch.zeros_like(self.weight,
                                                           dtype=torch.long),
                                          requires_grad=False).view(
                                              1,
                                              self.weight.size()[0], -1)
        if self.has_bias is True:
            self.bbsg = BSGen(self.bias, self.brng, swcfg)
            self.brdx = torch.nn.Parameter(torch.zeros_like(self.bias,
                                                            dtype=torch.long),
                                           requires_grad=False)

        # if bipolar, define a kernel for input bit 0, note that there is no bias required for this kernel
        if (self.mode == "bipolar") and (self.wtc is False):
            self.wbsg_i0 = BSGen(
                self.weight.view(1,
                                 self.weight.size()[0], -1), self.wrng, swcfg)
            self.wrdx_i0 = torch.nn.Parameter(
                torch.zeros_like(self.weight, dtype=torch.long),
                requires_grad=False).view(1,
                                          self.weight.size()[0], -1)

        # indicator of even/odd cycle
        self.even_cycle_flag = torch.nn.Parameter(torch.ones(1,
                                                             dtype=torch.bool),
                                                  requires_grad=False)
        self.padding_0 = torch.nn.ConstantPad2d(self.padding, 0)
        self.padding_1 = torch.nn.ConstantPad2d(self.padding, 1)
        self.bipolar_mode = torch.nn.Parameter(torch.tensor(
            [self.mode == "bipolar"], dtype=torch.bool),
                                               requires_grad=False)