def build(self, types: List[ir.Type]): """Builds the ir.Module. The module has only the @main function, which will convert the input through the list of types and then back to the initial type. The roundtrip type must be a dense tensor.""" assert self._module is None, 'StressTest: must not call build() repeatedly' self._module = ir.Module.create() with ir.InsertionPoint(self._module.body): tp0 = types.pop(0) self._roundtripTp = tp0 # TODO: assert dense? assert element type is recognised by the TypeConverter? types.append(tp0) funcTp = ir.FunctionType.get(inputs=[tp0], results=[tp0]) funcOp = builtin.FuncOp(name='main', type=funcTp) funcOp.attributes['llvm.emit_c_interface'] = ir.UnitAttr.get() with ir.InsertionPoint(funcOp.add_entry_block()): arg0 = funcOp.entry_block.arguments[0] self._assertEqualsRoundtripTp(arg0.type) v = st.ConvertOp(types.pop(0), arg0) for tp in types: w = st.ConvertOp(tp, v) # Release intermediate tensors before they fall out of scope. st.ReleaseOp(v.result) v = w self._assertEqualsRoundtripTp(v.result.type) std.ReturnOp(v) return self
def basicpy_ExecOp(self): """Creates a basicpy.exec op. Returns: Insertion point to the body. """ op = _ir.Operation.create("basicpy.exec", regions=1, ip=self.ip, loc=self.loc) b = op.regions[0].blocks.append() return _ir.InsertionPoint(b)
def scf_IfOp(self, results, condition: _ir.Value, with_else_region: bool): """Creates an SCF if op. Returns: (if_op, then_ip, else_ip) if with_else_region, otherwise (if_op, then_ip) """ op = _ir.Operation.create("scf.if", results=results, operands=[condition], regions=2 if with_else_region else 1, loc=self.loc, ip=self.ip) then_region = op.regions[0] then_block = then_region.blocks.append() if with_else_region: else_region = op.regions[1] else_block = else_region.blocks.append() return op, _ir.InsertionPoint(then_block), _ir.InsertionPoint( else_block) else: return op, _ir.InsertionPoint(then_block)
def emit_benchmark_wrapped_main_func(func, timer_func): """Takes a function and a timer function, both represented as FuncOp objects, and returns a new function. This new function wraps the call to the original function between calls to the timer_func and this wrapping in turn is executed inside a loop. The loop is executed len(func.type.results) times. This function can be used to create a "time measuring" variant of a function. """ i64_type = ir.IntegerType.get_signless(64) memref_of_i64_type = ir.MemRefType.get([-1], i64_type) wrapped_func = func.FuncOp( # Same signature and an extra buffer of indices to save timings. "main", (func.arguments.types + [memref_of_i64_type], func.type.results), visibility="public" ) wrapped_func.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get() num_results = len(func.type.results) with ir.InsertionPoint(wrapped_func.add_entry_block()): timer_buffer = wrapped_func.arguments[-1] zero = arith.ConstantOp.create_index(0) n_iterations = memref.DimOp(ir.IndexType.get(), timer_buffer, zero) one = arith.ConstantOp.create_index(1) iter_args = list(wrapped_func.arguments[-num_results - 1:-1]) loop = scf.ForOp(zero, n_iterations, one, iter_args) with ir.InsertionPoint(loop.body): start = func.CallOp(timer_func, []) call = func.CallOp( func, wrapped_func.arguments[:-num_results - 1] + loop.inner_iter_args ) end = func.CallOp(timer_func, []) time_taken = arith.SubIOp(end, start) memref.StoreOp(time_taken, timer_buffer, [loop.induction_variable]) scf.YieldOp(list(call.results)) func.ReturnOp(loop) return wrapped_func
def build_SpMM(attr: st.EncodingAttr): """Build SpMM kernel. This method generates a linalg op with for matrix multiplication using just the Python API. Effectively, a generic linalg op is constructed that computes C(i,j) += A(i,k) * B(k,j) for annotated matrix A. """ module = ir.Module.create() f64 = ir.F64Type.get() a = ir.RankedTensorType.get([3, 4], f64, attr) b = ir.RankedTensorType.get([4, 2], f64) c = ir.RankedTensorType.get([3, 2], f64) arguments = [a, b, c] with ir.InsertionPoint(module.body): @func.FuncOp.from_py_func(*arguments) def spMxM(*args): return matmul_dsl(args[0], args[1], outs=[args[2]]) return module
def build_SDDMM(attr: st.EncodingAttr): """Build SDDMM kernel. This method generates a linalg op with for matrix multiplication using just the Python API. Effectively, a generic linalg op is constructed that computes C(i,j) += S(i,j) SUM_k A(i,k) B(k,j) for sparse S. """ module = ir.Module.create() f64 = ir.F64Type.get() a = ir.RankedTensorType.get([8, 8], f64) b = ir.RankedTensorType.get([8, 8], f64) c = ir.RankedTensorType.get([8, 8], f64) s = ir.RankedTensorType.get([8, 8], f64, attr) arguments = [a, b, s, c] with ir.InsertionPoint(module.body): @builtin.FuncOp.from_py_func(*arguments) def sddmm(*args): return sddmm_dsl(args[0], args[1], args[2], outs=[args[3]]) return module
def create_instance_hier_op( self, inst_hier: InstanceHierarchyRoot) -> msft.InstanceHierarchyOp: """Create an instance hierarchy op 'inst_hier' and add it to the cache. Assert if one already exists in the cache.""" self._build_instance_hier_cache() (root_mod_symbol, instance_name) = inst_hier._cache_key assert root_mod_symbol not in self._instance_hier_cache, \ "Cannot create two instance hierarchy roots for same module" with ir.InsertionPoint(self._module.body): hier_op = msft.InstanceHierarchyOp.create(root_mod_symbol, instance_name) self._instance_hier_cache[(root_mod_symbol, instance_name)] = hier_op self._instance_hier_obj_cache[(root_mod_symbol, instance_name)] = inst_hier return hier_op
def SystolicArray(row_inputs, col_inputs, pe_builder): """Build a systolic array.""" row_inputs_type = hw.ArrayType(row_inputs.type) col_inputs_type = hw.ArrayType(col_inputs.type) dummy_op = ir.Operation.create("dummy", regions=1) pe_block = dummy_op.regions[0].blocks.append(row_inputs_type.element_type, col_inputs_type.element_type) with ir.InsertionPoint(pe_block): result = pe_builder(Value(pe_block.arguments[0]), Value(pe_block.arguments[1])) value = Value(result) pe_output_type = value.type msft.PEOutputOp(value.value) sa_result_type = dim(pe_output_type, col_inputs_type.size, row_inputs_type.size) array = msft.SystolicArrayOp(sa_result_type, get_value(row_inputs), get_value(col_inputs)) dummy_op.regions[0].blocks[0].append_to(array.regions[0]) dummy_op.operation.erase() return Value(array.peOutputs)
def _get_ip(self): return ir.InsertionPoint(self.mod.body)
def insert_end_of_block(self, block: _ir.Block): self.push_ip(_ir.InsertionPoint(block))
def benchmark_sparse_mlir_multiplication(): """Benchmark for mlir sparse matrix multiplication. Because its an MLIR benchmark we need to return both a `compiler` function and a `runner` function. """ with ir.Context(), ir.Location.unknown(): module = ir.Module.create() f64 = ir.F64Type.get() param1_type = ir.RankedTensorType.get([1000, 1500], f64) param2_type = ir.RankedTensorType.get([1500, 2000], f64) result_type = ir.RankedTensorType.get([1000, 2000], f64) with ir.InsertionPoint(module.body): @func.FuncOp.from_py_func(param1_type, param2_type, result_type) def sparse_kernel(x, y, z): return matmul_dsl(x, y, outs=[z]) def compiler(): with ir.Context(), ir.Location.unknown(): kernel_func = get_kernel_func_from_module(module) timer_func = emit_timer_func() wrapped_func = emit_benchmark_wrapped_main_func( kernel_func, timer_func) main_module_with_benchmark = ir.Module.parse( str(timer_func) + str(wrapped_func) + str(kernel_func)) setup_passes(main_module_with_benchmark) c_runner_utils = os.getenv("MLIR_C_RUNNER_UTILS", "") assert os.path.exists(c_runner_utils),\ f"{c_runner_utils} does not exist." \ f" Please pass a valid value for" \ f" MLIR_C_RUNNER_UTILS environment variable." runner_utils = os.getenv("MLIR_RUNNER_UTILS", "") assert os.path.exists(runner_utils),\ f"{runner_utils} does not exist." \ f" Please pass a valid value for MLIR_RUNNER_UTILS" \ f" environment variable." engine = ExecutionEngine( main_module_with_benchmark, 3, shared_libs=[c_runner_utils, runner_utils]) return engine.invoke def runner(engine_invoke): compiled_program_args = [] for argument_type in [ result_type, param1_type, param2_type, result_type ]: argument_type_str = str(argument_type) dimensions_str = re.sub("<|>|tensor", "", argument_type_str) dimensions = [int(dim) for dim in dimensions_str.split("x")[:-1]] if argument_type == result_type: argument = np.zeros(dimensions, np.float64) else: argument = create_sparse_np_tensor(dimensions, 1000) compiled_program_args.append( ctypes.pointer( ctypes.pointer(rt.get_ranked_memref_descriptor(argument)))) np_timers_ns = np.array([0], dtype=np.int64) compiled_program_args.append( ctypes.pointer( ctypes.pointer(rt.get_ranked_memref_descriptor(np_timers_ns)))) engine_invoke("main", *compiled_program_args) return int(np_timers_ns[0]) return compiler, runner
from circt.dialects import hw, sv from mlir import ir with ir.Context() as ctx, ir.Location.unknown() as loc: circt.register_dialects(ctx) ctx.allow_unregistered_dialects = True sv_attr = sv.SVAttributeAttr.get("fold", "false") print(f"sv_attr: {sv_attr} {sv_attr.name} {sv_attr.expression}") # CHECK: sv_attr: #sv.attribute<"fold" = "false"> fold false sv_attr = sv.SVAttributeAttr.get("no_merge") print(f"sv_attr: {sv_attr} {sv_attr.name} {sv_attr.expression}") # CHECK: sv_attr: #sv.attribute<"no_merge"> no_merge None i1 = ir.IntegerType.get_signless(1) i1_inout = hw.InOutType.get(i1) m = ir.Module.create() with ir.InsertionPoint(m.body): wire_op = sv.WireOp(i1_inout, "wire1") wire_op.attributes["sv.attributes"] = ir.ArrayAttr.get([sv_attr]) print(wire_op) # CHECK: %wire1 = sv.wire {sv.attributes = [#sv.attribute<"no_merge">]} : !hw.inout<i1> reg_op = sv.RegOp(i1_inout, "reg1") reg_op.attributes["sv.attributes"] = ir.ArrayAttr.get([sv_attr]) print(reg_op) # CHECK: %reg1 = sv.reg {sv.attributes = [#sv.attribute<"no_merge">]} : !hw.inout<i1>
# RUN: cat %t | FileCheck --check-prefix=ERR %s import circt from circt.dialects import hw, msft import mlir.ir as ir import mlir.passmanager import sys with ir.Context() as ctx, ir.Location.unknown(): circt.register_dialects(ctx) i32 = ir.IntegerType.get_signless(32) i1 = ir.IntegerType.get_signless(1) m = ir.Module.create() with ir.InsertionPoint(m.body): extmod = msft.MSFTModuleExternOp(name='MyExternMod', input_ports=[], output_ports=[]) entity_extern = msft.EntityExternOp.create("tag", "extra details") op = msft.MSFTModuleOp(name='MyWidget', input_ports=[], output_ports=[]) with ir.InsertionPoint(op.add_entry_block()): msft.OutputOp([]) top = msft.MSFTModuleOp(name='top', input_ports=[], output_ports=[]) with ir.InsertionPoint(top.add_entry_block()): msft.OutputOp([])
# RUN: cat %t | FileCheck --check-prefix=ERR %s import circt from circt.dialects import hw, msft import mlir.ir as ir import mlir.passmanager import sys with ir.Context() as ctx, ir.Location.unknown(): circt.register_dialects(ctx) i32 = ir.IntegerType.get_signless(32) i1 = ir.IntegerType.get_signless(1) mod = ir.Module.create() with ir.InsertionPoint(mod.body): extmod = msft.MSFTModuleExternOp(name='MyExternMod', input_ports=[], output_ports=[]) entity_extern = msft.EntityExternOp.create("tag", "extra details") op = msft.MSFTModuleOp(name='MyWidget', input_ports=[], output_ports=[]) with ir.InsertionPoint(op.add_entry_block()): msft.OutputOp([]) top = msft.MSFTModuleOp(name='top', input_ports=[], output_ports=[]) with ir.InsertionPoint(top.add_entry_block()): msft.OutputOp([])
def _get_ip(self) -> ir.InsertionPoint: return ir.InsertionPoint(self._dyn_inst.body.blocks[0])