def printSuffix(self, expr: IR.Expr): self.out.printf('\n') type = self.decls[expr.idf] if Type.isInt(type): self.out.printf('return ', indent=True) self.print(expr) self.out.printf(';\n') elif Type.isTensor(type): idfr = expr.idf exponent = self.scales[expr.idf] num = 2**exponent if type.dim == 0: self.out.printf('cout << ', indent=True) self.out.printf('float(' + idfr + ')*' + str(num)) self.out.printf(' << endl;\n') else: iters = [] for i in range(type.dim): s = chr(ord('i') + i) tempVar = IR.Var(s) iters.append(tempVar) expr_1 = IRUtil.addIndex(expr, iters) cmds = IRUtil.loop(type.shape, iters, [IR.PrintAsFloat(expr_1, exponent)]) self.print(IR.Prog(cmds)) else: assert False self.out.decreaseIndent() self.out.printf('}\n', indent=True) self.out.close()
def visitId(self, node: AST.ID): idf = node.name prog = IR.Prog([]) expr = IR.Var(idf, inputVar=True if idf in self.globalVars else False) return (prog, expr)
def visitSum(self, node: AST.Sum): ''' expr_out i = 0 for (j = 0; j < n; j++) expr_in = prog_in expr_out = expr_out + expr_in i++ 1. for i in [0, C]: 2. expr_out[i] = expr_out[i] + shr(expr_in[i]) ''' var_idf = node.name self.decls[var_idf] = Type.Int() (prog_in, expr_in) = self.visit(node.expr) start, end = node.start, node.end expr_out = self.getTempVar() type_out = node.type var = IR.Var(var_idf) var_iter = self.getTempIterator() iters = self.getTempIterators(type_out.dim) (scale_out, height_shr, height_noshr) = self.getScaleForTreeSum(self.scales[expr_in.idf], end - start) intv_out = self.getIntervalForTreeSum(self.intvs[expr_in.idf], end - start) # Tree sum to sum output of each iteration expr_in_idx = IRUtil.addIndex(expr_in, iters) expr_out_idx = IRUtil.addIndex(expr_out, iters) cmd1 = IR.Memset(expr_out, type_out.size()) cmd2 = IR.Assn( expr_out_idx, IRUtil.add(expr_out_idx, IRUtil.shr(expr_in_idx, height_shr))) treeSum = IRUtil.loop(type_out.shape, iters, [cmd2]) # Final program to sum output of each iteration prog_sum = [ cmd1, IR.Assn(var, IR.Int(start)), IR.For(var_iter, 0, IRUtil.lt(var_iter, IR.Int(end - start)), prog_in.cmd_l + treeSum + [IR.Assn(var, IRUtil.inc(var))]) ] prog_out = IR.Prog(prog_sum) self.decls[expr_out.idf] = type_out self.scales[expr_out.idf] = scale_out self.intvs[expr_out.idf] = intv_out return (prog_out, expr_out)
def populateExpTable(self, p): [table_m, table_n] = self.expTableShape b = np.log2(table_n) # Currently looking at only 2D arrays assert table_m == 2 [m, M] = self.expRange max = int(np.ldexp(M - m, -p)) shl = self.getShl(max) #alpha_count = self.getAlphaCount(max, shl) alpha_count = table_n beta_count = table_n table = [[0 for _ in range(alpha_count)], [0 for _ in range(beta_count)]] alpha = Common.wordLength - shl - b pRes = self.getScale(1) for i in range(alpha_count): num = i * 2**(alpha + p) exp = np.exp(-num) table[0][i] = int(np.ldexp(exp, -pRes)) beta = alpha - b pRes = self.getScale(abs(np.exp(-m))) for i in range(beta_count): num = m + i * 2**(beta + p) exp = np.exp(-num) table[1][i] = int(np.ldexp(exp, -pRes)) tableVar = [ IR.Var('EXP' + str(abs(p)) + 'A', inputVar=True), IR.Var('EXP' + str(abs(p)) + 'B', inputVar=True) ] return [table, tableVar]
def visitLet(self, node: AST.Let): (prog_decl, expr_decl) = self.visit(node.decl) type_decl = node.decl.type idf = node.name # e1 : Int if Type.isInt(type_decl): self.decls[idf] = Type.Int() (prog_in, expr_in) = self.visit(node.expr) cmd = IR.Assn(IR.Var(idf), expr_decl) prog_let = IR.Prog([cmd]) prog_out = IRUtil.concatPrograms(prog_decl, prog_let, prog_in) return (prog_out, expr_in) # e1 : Tensor{(),(..)} else: self.scales[idf] = self.scales[expr_decl.idf] self.intvs[idf] = self.intvs[expr_decl.idf] if isinstance(node.decl, AST.Decl): self.globalVars.append(idf) self.decls[idf] = node.decl.type expr_decl.idf = idf expr_decl.inputVar = True (prog_in, expr_in) = self.visit(node.expr) prog_in = prog_in.subst(idf, expr_decl) expr_in = expr_in.subst(idf, expr_decl) prog_out = IRUtil.concatPrograms(prog_decl, prog_in) return (prog_out, expr_in)
def printSuffix(self, expr: IR.Expr): self.out.printf('\n') type = self.decls[expr.idf] if Type.isInt(type): self.out.printf('return ', indent=True) self.print(expr) self.out.printf(';\n') elif Type.isTensor(type): idfr = expr.idf exponent = self.scales[expr.idf] num = 2**exponent if type.dim == 0: self.out.printf('Serial.println(', indent=True) self.out.printf('float(' + idfr + ')*' + str(num)) self.out.printf(', 6);\n') else: iters = [] for i in range(type.dim): s = chr(ord('i') + i) tempVar = IR.Var(s) iters.append(tempVar) expr_1 = IRUtil.addIndex(expr, iters) cmds = IRUtil.loop(type.shape, iters, [IR.PrintAsFloat(expr_1, exponent)]) self.print(IR.Prog(cmds)) else: assert False self.out.decreaseIndent() self.out.printf('}\n', indent=True) self.out.close() with open(os.path.join(self.outputDir, "ram.usage"), "w") as f: f.write("Estimate RAM usage :: %d bytes" % (self.maxRAMestimate))
def printSuffix(self, expr: IR.Expr): self.out.printf('\n') if config.vbwEnabled and forFixed(): bw = self.varsForBitwidth['X'] typ_str = "int%d_t" % bw size = self.decls['X'].shape sizestr = ''.join([("[%d]" % i) for i in size]) Xindexstr = '' Xintstar = ''.join(["*" for i in size]) for i in range(len(size)): Xindexstr += (("[i%d]" % (i - 1)) if i > 0 else "") self.out.printf("for (int i%d = 0; i%d < %d; i%d ++ ){\n" % (i, i, size[i], i), indent=True) self.out.increaseIndent() for i in range(len(size) - 1, -1, -1): self.out.decreaseIndent() self.out.printf("}\n", indent=True) self.out.printf("delete[] X%s;\n" % (Xindexstr), indent=True) Xindexstr = Xindexstr[:-4] if len(Xindexstr) > 0 else Xindexstr assert len( size ) < 10, "Too simple logic for printing indices used, cannot handle 10+ Dim Tensors" type = self.decls[expr.idf] if Type.isInt(type): self.out.printf('return ', indent=True) self.print(expr) self.out.printf(';\n') elif Type.isTensor(type): idfr = expr.idf exponent = self.scales[expr.idf] num = 2**exponent if type.dim == 0: self.out.printf('cout << ', indent=True) self.out.printf('float(' + idfr + ')*' + str(num)) self.out.printf(' << endl;\n') else: iters = [] for i in range(type.dim): s = chr(ord('i') + i) tempVar = IR.Var(s) iters.append(tempVar) expr_1 = IRUtil.addIndex(expr, iters) cmds = IRUtil.loop(type.shape, iters, [IR.PrintAsFloat(expr_1, exponent)]) self.print(IR.Prog(cmds)) else: assert False self.out.decreaseIndent() self.out.printf('}\n', indent=True) def isInt(a): try: int(a) return True except: return False if forFixed(): if (int(self.printSwitch) if isInt(self.printSwitch) else -2) > -1: self.out.printf("const int switches = %d;\n" % (int(self.printSwitch)), indent=True) self.out.printf( 'void seedotFixedSwitch(int i, MYINT **X_temp, int& res) {\n', indent=True) self.out.increaseIndent() self.out.printf('switch(i) {\n', indent=True) self.out.increaseIndent() for i in range(int(self.printSwitch)): self.out.printf( 'case %d: res = seedotFixed%d(X_temp); return;\n' % (i, i + 1), indent=True) self.out.printf('default: res = -1; return;\n', indent=True) self.out.decreaseIndent() self.out.printf('}\n', indent=True) self.out.decreaseIndent() self.out.printf('}\n', indent=True) if debugCompiler(): print("Closing File after outputting cpp code: ID " + self.idStr) self.out.close()
def visitBopSparseMul(self, node: AST.Bop1): (prog_in_A, expr_in_A) = self.visit(node.expr1) (prog_in_B, expr_in_B) = self.visit(node.expr2) [P, Q] = node.expr1.type.shape [Q, R] = node.expr2.type.shape assert R == 1 expr_out = self.getTempVar() type_out = node.type scale_in_A, scale_in_B = self.scales[expr_in_A.idf], self.scales[ expr_in_B.idf] intv_in_A, intv_in_B = self.intvs[expr_in_A.idf], self.intvs[ expr_in_B.idf] [shr_A, shr_B] = self.getShrForMul(scale_in_A, scale_in_B) scale_treeSum = self.getScaleForMul(scale_in_A, shr_A, scale_in_B, shr_B) intv_treeSum = self.getIntvervalForMul(intv_in_A, shr_A, intv_in_B, shr_B) (scale_out, height_shr, height_noshr) = self.getScaleForTreeSum(scale_treeSum, Q) intv_out = self.getIntervalForTreeSum(intv_treeSum, Q) in_A_idx = IR.Var(expr_in_A.idf[0] + 'idx', expr_in_A.idx, inputVar=True) in_A_val = IR.Var(expr_in_A.idf[0] + 'val', expr_in_A.idx, inputVar=True) shr_A = self.formatShr(shr_A) shr_B = self.formatShr(shr_B) height_shr = self.formatShr(height_shr) in_A_idx.inputVar = False in_A_val.inputVar = False expr_in_B.inputVar = False expr_out.inputVar = False cmd0 = IR.Comment(expr_in_A.idf + ' |*| ' + expr_in_B.idf) cmd1 = IR.Memset(expr_out, type_out.size()) funcCall = IR.FuncCall( "SparseMatMul", { in_A_idx: "Aidx", in_A_val: "Aval", expr_in_B: "B", expr_out: "C", IR.Int(Q): "K", shr_A: "shrA", shr_B: "shrB", height_shr: "shrC" }) prog_mul = IR.Prog([cmd0, cmd1, funcCall]) prog_out = IRUtil.concatPrograms(prog_in_A, prog_in_B, prog_mul) self.decls[expr_out.idf] = type_out self.scales[expr_out.idf] = scale_out self.intvs[expr_out.idf] = intv_out # Length of Aidx and Aval hard coded to 100 # This is safe as it will be ignored in the generated code self.decls.update({ in_A_idx.idf: Type.Tensor([100]), in_A_val.idf: Type.Tensor([100]), }) self.globalVars.append(in_A_idx.idf) self.globalVars.append(in_A_val.idf) return (prog_out, expr_out)
def getTempIterator(self): var = IR.Var('i' + str(self.counter_iter)) self.counter_iter += 1 return var
def getTempVar(self): var = IR.Var('tmp' + str(self.counter_var)) self.counter_var += 1 return var