def set_op_node_name(self, node: pm.Node, operand: OperandTemplate): if operand.node_name is not None and operand.node_name != node.name: raise RuntimeError( f"Name already set to different value for operand:\n" f"Previous name: {operand.node_name}\n" f"New name: {node.name}") operand.set_node_name(node.name)
def set_dim_values(self, node: pm.Node, operand: OperandTemplate): if not operand.is_instantiated(): if len(operand.permutation) == len(node.shape): perm_map = { s: operand.permutation[s] for s in range(len(node.shape)) } else: perm_map = {s: s for s in range(len(node.shape))} for j, s_ in enumerate(node.shape): key = operand.shape_list[j] s = node.shape[perm_map[j]] operand.update_shape_symbols(key, s) if key not in self.required_params: self.add_required_param(key, s) elif key in self.required_params: if not self.required_params[key].is_set(): self.set_required_param(key, s) elif self.required_params[key].value != s: raise RuntimeError( f"Inconsistent dimension sizes for operation {self.op_name}{self.instance_id}\n" f"Key: {key}\n" f"Size: {self.required_params[key].value}\n" f"Node shape: {node.shape}\n" f"Node name: {operand.name}\n" f"Shape list: {operand.shape_list}") if len(operand.shape_list) != len( list(operand.shape_symbols.keys())): raise RuntimeError( f"All shape values were not set for node {node.name}, operand {operand.name}:\n" f"Node shape: {node.shape}\n" f"Operand shape variables: {operand.shape_list}")
def global_avg_pool(hag: ArchitectureNode): # data = OperandTemplate("data", OP_DTYPES, ["N", "C", "IH", "IW"], dtype=OP_DTYPES[2]) # out = OperandTemplate("out", OP_DTYPES, ["N", "C", "OH", "OW"], dtype=OP_DTYPES[2]) # # TODO: Add option to create operand with Codelet("global_avg_pool", [data], [out], hag) as cdlt: cdlt.configure("start", "SIMD") cdlt.configure("start", "IMM", immediate_value=0) with Loop(0, "N") as n: with Loop(0, "C") as c: with Loop(0, "IH") as iy: with Loop(0, "IW") as ix: with Loop(0, "OH") as oy: with Loop(0, "OW") as ox: cdlt.transfer(data[n, c, iy + oy, ix + ox], ["DRAM", "VMEM1"]) cdlt.compute("MAX", [data, data], [out], target="SIMD") cdlt.transfer(out[n, c, oy, ox], ["VMEM1", "DRAM"]) return cdlt
def maxpool2d(hag: ArchitectureNode): # data = OperandTemplate("data", OP_DTYPES, ["N", "C", "IH", "IW"], dtype=OP_DTYPES[2]) # out = OperandTemplate("out", OP_DTYPES, ["N", "C", "OH", "OW"], dtype=OP_DTYPES[2]) # # TODO: Add option to create operand with Codelet("max_pool", [data], [out], hag) as cdlt: cdlt.configure("start", "SIMD") cdlt.configure("start", "IMM", immediate_value=0) with Loop(0, "N") as n: with Loop(0, "C") as c: with Loop(0, "KH") as kh: with Loop(0, "KW") as kw: with Loop(0, "OH") as y: with Loop(0, "OW") as x: cdlt.transfer( data[n, c, y * "sy" + kh, x * "sx" + kw], ["DRAM", "VMEM1"]) cdlt.compute("MAX", [data, data], [out], target="SIMD") cdlt.transfer(out[n, c, y, x], ["VMEM1", "DRAM"]) return cdlt
def conv2d_bias(hag: ArchitectureNode): # TODO: Need to figure out how to change the memory layout data = OperandTemplate("data", OP_DTYPES, ["N", "IC", "IH", "IW"], dtype=OP_DTYPES[0]) weight = OperandTemplate("weight", OP_DTYPES, ["OC", "IC", "KH", "KW"], dtype=OP_DTYPES[0]) bias = OperandTemplate("bias", OP_DTYPES, ["OC"], dtype=OP_DTYPES[2]) out = OperandTemplate("out", OP_DTYPES, ["N", "OC", "OH", "OW"], dtype=OP_DTYPES[2]) required_params = {} with Codelet("conv_bias", [data, weight, bias], [out], hag, required_params=required_params) as cdlt: cdlt.configure("start", "systolic_array") cdlt.configure("start", "WBUF") cdlt.configure("start", "BBUF") cdlt.configure("start", "IBUF") cdlt.configure("start", "OBUF") with Loop(0, "OC") as oc: with Loop(0, "N") as n: with Loop(0, "IC") as ic: with Loop(0, "KH") as kh: with Loop(0, "KW") as kw: with Loop(0, "OH") as y: with Loop(0, "OW") as x: cdlt.transfer(weight[oc, ic, kh, kw], ["DRAM", "WBUF"]) cdlt.transfer(bias[oc], ["DRAM", "BBUF"]) cdlt.transfer( data[n, ic, y * "stride" + kh, x * "stride" + kw], ["DRAM", "IBUF"]) cdlt.transfer(out[n, oc, y, x], ["DRAM", "OBUF"]) cdlt.compute("MVMUL", [data, weight, bias], [out], target="pe_array") # cdlt.compute("MVMUL", [data[n, ic, y*"stride" + kh, x*"stride" + kw], weight[oc, ic, kh, kw], bias[oc]], [out[n, oc, y, x]], target="pe_array") cdlt.transfer(out[n, oc, y, x], ["OBUF", "DRAM"]) # TODO: Add store off chip cdlt.configure("end", "WBUF") cdlt.configure("end", "BBUF") cdlt.configure("end", "IBUF") cdlt.configure("end", "OBUF") cdlt.configure("end", "systolic_array") return cdlt
def relu(hag: ArchitectureNode): op1 = OperandTemplate("op1", OP_DTYPES, ["N", "C", "H", "W"], dtype=OP_DTYPES[2]) out = OperandTemplate("out", OP_DTYPES, ["N", "C", "H", "W"], dtype=OP_DTYPES[2]) with Codelet("relu", [op1], [out], hag) as cdlt: cdlt.configure("start", "SIMD") # cdlt.configure("start", "VMEM") with Loop(0, "N") as n: with Loop(0, "C") as c: with Loop(0, "H") as h: with Loop(0, "W") as w: cdlt.transfer(op1[n, c, h, w], ["DRAM", "VMEM1"]) cdlt.compute("RELU", [op1], [out], target="SIMD") cdlt.transfer(out[n, c, h, w], ["VMEM1", "DRAM"]) return cdlt
def maxpool2d_nchw(hag: ArchitectureNode): # loop_order = ["n", "oc", "kh", "kh", "ow", "oh"] # data = OperandTemplate("data", OP_DTYPES, ["N", "C", "IH", "IW"]) # out = OperandTemplate("out", OP_DTYPES, ["N", "C", "OH", "OW"]) # # TODO: Add option to create operand # # 1. set IMM to negative infinity with Codelet("max_pool", [data], [out], hag) as cdlt: cdlt.configure("start", "SIMD") cdlt.configure("start", "VMEM") cdlt.configure("start", "IMM") # 1. with Loop(0, "N") as n: with Loop(0, "C") as c: with Loop(0, "OH") as y: with Loop(0, "OW") as x: cdlt.transfer(data[n, c, y * "stride", x * "stride"], ["DRAM", "VMEM"]) cdlt.compute("MAX", [data, "IMM"], [out], target="SIMD") cdlt.transfer(out[n, c, y, x], ["SIMD", "VMEM"]) with Loop(0, "N") as n: with Loop(0, "C") as c: with Loop(0, "KH") as kh: with Loop(0, "KW") as kw: with Loop(0, "OH") as y: with Loop(0, "OW") as x: cdlt.transfer( data[n, c, y * "stride" + kh, x * "stride" + kw], ["DRAM", "VMEM"]) cdlt.transfer(out[n, c, y, x], ["VMEM"]) cdlt.compute("MAX", [data], [out[n]], target="SIMD") cdlt.transfer(out[n, c, y, x], ["pe_array", "OBUF", "DRAM"]) # TODO: Add store off chip cdlt.configure("end", "WBUF") cdlt.configure("end", "IBUF") cdlt.configure("end", "OBUF")
def elem_add(hag: ArchitectureNode): op1 = OperandTemplate("op1", OP_DTYPES, ["N", "C", "H", "W"], dtype=OP_DTYPES[2]) op2 = OperandTemplate("op2", OP_DTYPES, ["N", "C", "H", "W"], dtype=OP_DTYPES[2]) out = OperandTemplate("add_out", OP_DTYPES, ["N", "C", "H", "W"], dtype=OP_DTYPES[2]) with Codelet("elem_add", [op1, op2], [out], hag) as cdlt: cdlt.configure("start", "SIMD") with Loop(0, "N") as n: with Loop(0, "C") as c: with Loop(0, "H") as h: with Loop(0, "W") as w: cdlt.transfer(op1[n, c, h, w], ["DRAM", "VMEM1"]) cdlt.transfer(op2[n, c, h, w], ["DRAM", "VMEM2"]) cdlt.compute("ADD", [op1, op2], [out], target="SIMD") cdlt.transfer(out[n, c, h, w], ["VMEM1", "DRAM"]) return cdlt
def gemm(hag: ArchitectureNode): data = OperandTemplate("data", OP_DTYPES, ["M", "N"], dtype=OP_DTYPES[0]) weight = OperandTemplate("weight", OP_DTYPES, ["N", "P"], dtype=OP_DTYPES[0]) bias = OperandTemplate("bias", OP_DTYPES, ["P"], dtype=OP_DTYPES[2]) out = OperandTemplate("out", OP_DTYPES, ["M", "P"], dtype=OP_DTYPES[2]) required_params = {} with Codelet("gemm", [data, weight, bias], [out], hag, required_params=required_params) as cdlt: cdlt.configure("start", "systolic_array") cdlt.configure("start", "WBUF") cdlt.configure("start", "IBUF") cdlt.configure("start", "BBUF") cdlt.configure("start", "OBUF") with Loop(0, "P") as p: with Loop(0, "N") as n: with Loop(0, "M") as m: cdlt.transfer(data[m, n], ["DRAM", "IBUF"]) cdlt.transfer(weight[n, p], ["DRAM", "WBUF"]) cdlt.transfer(bias[p], ["DRAM", "BBUF"]) cdlt.transfer(out[m, p], ["DRAM", "OBUF"]) cdlt.compute("MVMUL", [data, weight, bias], [out], target="pe_array") cdlt.transfer(out[m, p], ["OBUF", "DRAM"]) # TODO: Add store off chip cdlt.configure("end", "WBUF") cdlt.configure("end", "IBUF") cdlt.configure("end", "OBUF") cdlt.configure("end", "BBUF") cdlt.configure("end", "systolic_array") return cdlt