def forward(self, input: Tensor, hx: Tensor) -> Tensor: if hx is None: hx = torch.zeros(input.size()[0], self.hidden_size, dtype=input.dtype, device=input.device) rnncell = FSUMGUCell(self.input_size, self.hidden_size, bias=self.bias, weight_ext_f=self.weight_f, bias_ext_f=self.bias_f, weight_ext_n=self.weight_n, bias_ext_n=self.bias_n, hx_buffer=hx, hwcfg=self.hwcfg, swcfg=self.swcfg).to(input.device) iSource = BinGen(input, self.hwcfg, self.swcfg)().to(input.device) iRNG = RNG(self.hwcfg, self.swcfg)().to(input.device) iBSG = BSGen(iSource, iRNG, self.swcfg).to(input.device) hSource = BinGen(hx, self.hwcfg, self.swcfg)().to(input.device) hRNG = RNG(self.hwcfg, self.swcfg)().to(input.device) hBSG = BSGen(hSource, hRNG, self.swcfg).to(input.device) oPE = ProgError(torch.zeros(input.size()[0], self.hidden_size, dtype=input.dtype, device=input.device), self.hwcfg_ope).to(input.device) for c in range(2**self.hwcfg["width"]): idx = torch.zeros(iSource.size(), dtype=torch.long, device=input.device) iBS = iBSG(idx + c) hdx = torch.zeros(hSource.size(), dtype=torch.long, device=input.device) hBS = hBSG(hdx + c) oBS = rnncell(iBS, hBS) oPE.Monitor(oBS) hy = oPE()[0] return hy
def test_fsumgu(): bitwidth_list = [7, 8, 9, 10] for bitwidth in bitwidth_list: print("bit width:", bitwidth) win_sz = 10 batch = 32 input_sz = 256 hidden_sz = 64 intwidth = 1 fracwidth = bitwidth - intwidth mode = "bipolar" depth = bitwidth + 2 depth_ismul = bitwidth - 4 rng = "Sobol" bias = False output_error_only = True hwcfg = { "width": bitwidth, "mode": mode, "depth": depth, "depth_ismul": depth_ismul, "rng": rng, "dimr": 1, "scale": 1 } swcfg = { "btype": torch.float, "rtype": torch.float, "stype": torch.float } input = torch.randn(win_sz, batch, input_sz).to(device) input = truncated_normal(input, mean=0, std=0.4) hx1 = torch.randn(batch, hidden_sz).to(device) hx1 = truncated_normal(hx1, mean=0, std=0.1) hx2 = hx1.clone().detach().to(device) hx3 = hx1.clone().detach().to(device) hx4 = hx1.clone().detach().to(device) output1 = [] output2 = [] output3 = [] output4 = [] rnn1 = HardMGUCell(input_sz, hidden_sz, bias=bias, hard=True).to(device) rnn3 = HardMGUCellFXP(input_sz, hidden_sz, bias=bias, hard=True, intwidth=intwidth, fracwidth=fracwidth).to(device) rnn3.weight_f.data = rnn1.weight_f.clone().detach().to(device) rnn3.weight_n.data = rnn1.weight_n.clone().detach().to(device) rnn4 = HUBMGUCell(input_sz, hidden_sz, bias=bias, weight_ext_f=rnn1.weight_f, bias_ext_f=rnn1.bias_f, weight_ext_n=rnn1.weight_n, bias_ext_n=rnn1.bias_n, hwcfg=hwcfg).to(device) for i in range(win_sz): hx1 = rnn1(input[i], hx1) output1.append(hx1) hx3 = rnn3(input[i], hx3) output3.append(hx3) hx4 = rnn4(input[i], hx4) output4.append(hx4) iVec, hVec = input[i], hx2 # rnn2 in the loop to mimic the hw reset rnn2 = FSUMGUCell(input_sz, hidden_sz, bias=bias, weight_ext_f=rnn1.weight_f, bias_ext_f=rnn1.bias_f, weight_ext_n=rnn1.weight_n, bias_ext_n=rnn1.bias_n, hx_buffer=hx2, hwcfg=hwcfg, swcfg=swcfg).to(device) iSource = BinGen(iVec, hwcfg, swcfg)().to(device) iRNG = RNG(hwcfg, swcfg)().to(device) iBSG = BSGen(iSource, iRNG, swcfg).to(device) iPE = ProgError(iVec, hwcfg).to(device) hSource = BinGen(hVec, hwcfg, swcfg)().to(device) hRNG = RNG(hwcfg, swcfg)().to(device) hBSG = BSGen(hSource, hRNG, swcfg).to(device) hPE = ProgError(hVec, hwcfg).to(device) oVec = output1[i] oPE = ProgError(oVec, hwcfg).to(device) fg_ug_in_PE = ProgError(rnn1.fg_ug_in, hwcfg).to(device) fg_in_PE = ProgError(rnn1.fg_in, hwcfg).to(device) fg_PE = ProgError(rnn1.fg, hwcfg).to(device) fg_hx_PE = ProgError(rnn1.fg_hx, hwcfg).to(device) ng_ug_in_PE = ProgError(rnn1.ng_ug_in, hwcfg).to(device) ng_PE = ProgError(rnn1.ng, hwcfg).to(device) fg_ng_PE = ProgError(rnn1.fg_ng, hwcfg).to(device) fg_ng_inv_PE = ProgError(rnn1.fg_ng_inv, hwcfg).to(device) for c in range(2**bitwidth): idx = torch.zeros(iSource.size()).type(torch.long).to(device) iBS = iBSG(idx + c) iPE.Monitor(iBS) hdx = torch.zeros(hSource.size()).type(torch.long).to(device) hBS = hBSG(hdx + c) hPE.Monitor(hBS) start_time = time.time() oBS = rnn2(iBS, hBS) fg_ug_in_PE.Monitor(rnn2.fg_ug_in) fg_in_PE.Monitor(rnn2.fg_in) fg_PE.Monitor(rnn2.fg) fg_hx_PE.Monitor(rnn2.fg_hx) ng_ug_in_PE.Monitor(rnn2.ng_ug_in) ng_PE.Monitor(rnn2.ng) fg_ng_PE.Monitor(rnn2.fg_ng) fg_ng_inv_PE.Monitor(rnn2.fg_ng_inv) oPE.Monitor(oBS) hx2 = oPE()[0] output2.append(hx2) # print("======>> window: " + str(i) + "<<======") # print("--- %s seconds ---" % (time.time() - start_time)) if output_error_only: pass else: progerror_report(iPE, "input") progerror_report(hPE, "hidden") progerror_report(fg_ug_in_PE, "fg_ug_in") progerror_report(fg_in_PE, "fg_in") progerror_report(fg_PE, "fg") progerror_report(fg_hx_PE, "fg_hx") progerror_report(ng_ug_in_PE, "ng_ug_in") progerror_report(ng_PE, "ng") progerror_report(fg_ng_PE, "fg_ng") progerror_report(fg_ng_inv_PE, "fg_ng_inv") progerror_report(oPE, str(i) + "-th win output fsu") hub_err = hx1 - hx4 min = hub_err.min().item() max = hub_err.max().item() rmse = torch.sqrt(torch.mean(torch.square(hub_err))) std, mean = torch.std_mean(hub_err) print("{:30s}".format(str(i)+"-th win output hub") + \ ", Absolute Error range," + "{:12f}".format(min) + ", {:12f}".format(max) + \ ", std," + "{:12f}".format(std) + \ ", mean," + "{:12f}".format(mean) + \ ", rmse," + "{:12f}".format(rmse)) fxp_err = hx1 - hx3 min = fxp_err.min().item() max = fxp_err.max().item() rmse = torch.sqrt(torch.mean(torch.square(fxp_err))) std, mean = torch.std_mean(fxp_err) print("{:30s}".format(str(i)+"-th win output fxp") + \ ", Absolute Error range," + "{:12f}".format(min) + ", {:12f}".format(max) + \ ", std," + "{:12f}".format(std) + \ ", mean," + "{:12f}".format(mean) + \ ", rmse," + "{:12f}".format(rmse)) print()
def test_fsuadd(): hwcfg = { "width": 12, "mode": "bipolar", "dimr": 1, "dima": 0, "rng": "sobol", "scale": 1, "depth": 20, "entry": None } swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float} bitwidth = hwcfg["width"] rng = hwcfg["rng"] plot_en = False modes = ["bipolar", "unipolar"] size = [128, 256, 512] scaled = [True, False] result_pe = [] for mode in modes: for scale in scaled: run_time = 0 acc_dim = hwcfg["dima"] scale_mod = size[acc_dim] result_pe_cycle = [] hwcfg["mode"] = mode hwcfg["scale"] = scale_mod if scale else 1 uadd = FSUAdd(hwcfg, swcfg).to(device) if mode == "unipolar": iVec = torch.rand(size).mul(2**bitwidth).round().div( 2**bitwidth).to(device) elif mode == "bipolar": iVec = torch.rand(size).mul(2).sub(1).mul( 2**bitwidth).round().div(2**bitwidth).to(device) oVec = torch.sum(iVec, acc_dim).to(device) iVecSource = BinGen(iVec, hwcfg, swcfg)().to(device) iVecRNG = RNG(hwcfg, swcfg)().to(device) iVecBS = BSGen(iVecSource, iVecRNG, swcfg).to(device) hwcfg["scale"] = 1 iVecPE = ProgError(iVec, hwcfg).to(device) print("iVecPE cfg", iVecPE.hwcfg) hwcfg["scale"] = scale_mod if scale else 1 oVecPE = ProgError(oVec, hwcfg).to(device) print("oVecPE cfg", oVecPE.hwcfg) with torch.no_grad(): idx = torch.zeros(iVecSource.size()).type( torch.long).to(device) for i in range(2**bitwidth): iBS = iVecBS(idx + i) iVecPE.Monitor(iBS) start_time = time.time() oVecU = uadd(iBS) run_time = time.time() - start_time + run_time if i == 0: print("uadd cfg", uadd.hwcfg) oVecPE.Monitor(oVecU) rmse = torch.sqrt( torch.mean(torch.mul(oVecPE()[1], oVecPE()[1]))) result_pe_cycle.append(1 - rmse.item()) print("--- %s seconds ---" % (time.time() - start_time)) print("RNG: " + rng + ", data: " + mode + ", scaled: " + str(scale)) print("input error: ", "min: ", torch.min(iVecPE()[1]).item(), "max: ", torch.max(iVecPE()[1]).item()) print("output error: ", "min: ", torch.min(oVecPE()[1]).item(), "max: ", torch.max(oVecPE()[1]).item(), "RMSE: ", rmse.item()) print() if plot_en is True: result_pe = oVecPE()[1].cpu().numpy() print("error distribution=========>") plt.figure(figsize=(3, 1.5)) fig = plt.hist( result_pe.flatten(), bins='auto') # arguments are passed to np.histogram plt.show() print("progressive accuracy=========>") plt.figure(figsize=(3, 1.5)) fig = plt.plot(result_pe_cycle ) # arguments are passed to np.histogram plt.show()
def test_bi2uni(): hwcfg = { "width": 8, "mode": "bipolar", "dimr": 1, "rng": "sobol", "scale": 1, "depth": 3 } swcfg = {"rtype": torch.float, "stype": torch.float, "btype": torch.float} bitwidth = hwcfg["width"] mode = hwcfg["mode"] rng = "Sobol" in_dim = 1024 bitwidth = 8 in_mode = "bipolar" out_mode = "unipolar" stype = torch.float btype = torch.float rtype = torch.float uBi2Uni = Bi2Uni(hwcfg, swcfg).to(device) iVec = ((torch.rand(in_dim) * (2**bitwidth)).round() / (2**bitwidth)).to(device) start_time = time.time() oVec = iVec.type(torch.float) print("--- %s seconds ---" % (((time.time() - start_time)) * 2**bitwidth)) print("input", iVec) print("real output", oVec) hwcfg["mode"] = "bipolar" iVecSource = BinGen(iVec, hwcfg, swcfg)().to(device) iVecRNG = RNG(hwcfg, swcfg)().to(device) iVecBS = BSGen(iVecSource, iVecRNG, swcfg).to(device) iVecPE = ProgError(iVec, hwcfg).to(device) hwcfg["mode"] = "unipolar" oVecPE = ProgError(oVec, hwcfg).to(device) hwcfg["mode"] = "bipolar" with torch.no_grad(): idx = torch.zeros(iVecSource.size()).type(torch.long).to(device) start_time = time.time() for i in range((2**bitwidth)): iBS = iVecBS(idx + i) iVecPE.Monitor(iBS) oVecU = uBi2Uni(iBS) oVecPE.Monitor(oVecU) print("--- %s seconds ---" % (time.time() - start_time)) print("final input error: ", min(iVecPE()[1]), max(iVecPE()[1])) print("final output error:", min(oVecPE()[1]), max(oVecPE()[1])) print("final output pp:", oVecPE()[0].data) print("final output pe:", oVecPE()[1].data) print("final output mean error:", oVecPE()[1].mean()) result_pe = oVecPE()[1].cpu().numpy() # fig = plt.hist(result_pe, bins='auto') # arguments are passed to np.histogram # plt.title("Histogram for final output error") # plt.show() print(result_pe) print(result_pe.argmin(), result_pe.argmax()) print(result_pe[result_pe.argmin()], result_pe[result_pe.argmax()]) print(iVec[result_pe.argmin()], iVec[result_pe.argmax()])
def test_fsuconv2d(): plot_en = False hwcfg_input = {"width": 8, "rng": "Sobol", "dimr": 1} hwcfg = { "width": 8, "mode": "bipolar", "scale": None, "depth": 20, "rng": "Sobol", "dimr": 1 } swcfg = {"btype": torch.float, "rtype": torch.float, "stype": torch.float} rng = hwcfg["rng"] in_channels = 32 out_channels = 16 kernel_size = 3 stride = 2 padding = 0 dilation = 1 groups = 1 bias = True padding_mode = 'zeros' modes = ["bipolar", "unipolar"] scaled = [True, False] result_pe = [] for mode in modes: for scale in scaled: hwcfg["mode"] = mode hwcfg_input["mode"] = mode hwcfg["scale"] = (kernel_size * kernel_size * in_channels + bias) if scale else 1 length = 2**hwcfg["width"] length_input = 2**hwcfg_input["width"] result_pe_cycle = [] conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias, padding_mode=padding_mode).to(device) if mode == "unipolar": conv2d.weight.data = torch.rand( out_channels, in_channels, kernel_size, kernel_size).mul(length).round().div(length).to(device) if bias is True: conv2d.bias.data = torch.rand(out_channels).mul( length).round().div(length).to(device) elif mode == "bipolar": conv2d.weight.data = torch.rand( out_channels, in_channels, kernel_size, kernel_size).mul( 2).sub(1).mul(length).round().div(length).to(device) if bias is True: conv2d.bias.data = torch.rand(out_channels).mul(2).sub( 1).mul(length).round().div(length).to(device) uconv2d = FSUConv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias, padding_mode=padding_mode, weight_ext=conv2d.weight, bias_ext=conv2d.bias, hwcfg=hwcfg, swcfg=swcfg).to(device) input_size = (128, 32) iVec = ( (torch.rand(32, in_channels, input_size[0], input_size[1]) * length_input).round() / length_input).to(device) oVec = conv2d(iVec) iVecSource = BinGen(iVec, hwcfg, swcfg)().to(device) iVecRNG = RNG(hwcfg_input, swcfg)().to(device) iVecBS = BSGen(iVecSource, iVecRNG, swcfg).to(device) hwcfg["scale"] = 1 iVecPE = ProgError(iVec, hwcfg).to(device) hwcfg["scale"] = (kernel_size * kernel_size * in_channels + bias) if scale else 1 oVecPE = ProgError(oVec, hwcfg).to(device) with torch.no_grad(): idx = torch.zeros(iVecSource.size()).type( torch.long).to(device) start_time = time.time() for i in range(length): iBS = iVecBS(idx + i) iVecPE.Monitor(iBS) oVecU = uconv2d(iBS) oVecPE.Monitor(oVecU) rmse = torch.sqrt( torch.sum(torch.mul(oVecPE()[1], oVecPE()[1])) / torch.prod(torch.tensor(oVecPE()[1].size()))) if plot_en is True: result_pe_cycle.append(1 - rmse.item()) print("--- %s seconds ---" % (time.time() - start_time)) print("RNG: " + rng + ", data: " + mode + ", scaled: " + str(scale)) print("input error: ", "min: ", torch.min(iVecPE()[1]).item(), "max: ", torch.max(iVecPE()[1]).item()) print("output error: ", "min: ", torch.min(oVecPE()[1]).item(), "max: ", torch.max(oVecPE()[1]).item(), "RMSE: ", rmse.item()) print() if plot_en is True: result_pe = oVecPE()[1].cpu().numpy() print("error distribution=========>") plt.figure(figsize=(3, 1.5)) fig = plt.hist( result_pe.flatten(), bins='auto') # arguments are passed to np.histogram plt.show() print("progressive accuracy=========>") plt.figure(figsize=(3, 1.5)) fig = plt.plot(result_pe_cycle ) # arguments are passed to np.histogram plt.show()