def __init__(self, context: Optional[_ir.Context]): self.context = _ir.Context() if not context else context _cext.register_all_dialects(self.context) self.loc = _ir.Location.unknown( context=self.context) # type: _ir.Location self.module = None # type: Optional[_ir.Module] self._ip_stack = [] # Cache some types and attributes. with self.context: # Types. # TODO: Consolidate numpy.any_dtype and basicpy.UnknownType. self.unknown_type = _ir.Type.parse("!basicpy.UnknownType") self.bool_type = _ir.Type.parse("!basicpy.BoolType") self.bytes_type = _ir.Type.parse("!basicpy.BytesType") self.ellipsis_type = _ir.Type.parse("!basicpy.EllipsisType") self.none_type = _ir.Type.parse("!basicpy.NoneType") self.str_type = _ir.Type.parse("!basicpy.StrType") self.i1_type = _ir.IntegerType.get_signless(1) self.index_type = _ir.IndexType.get() self.unknown_tensor_type = _ir.UnrankedTensorType.get( self.unknown_type, loc=self.loc) self.unknown_array_type = _cext.shaped_to_ndarray_type( self.unknown_tensor_type) # Attributes. self.i1_true = _ir.IntegerAttr.get(self.i1_type, 1) self.i1_false = _ir.IntegerAttr.get(self.i1_type, 0)
def test_elementwise_add(): # Obtain path to runtime support library. support_lib = os.getenv('SUPPORT_LIB') assert support_lib is not None, 'SUPPORT_LIB is undefined' assert os.path.exists(support_lib), f'{support_lib} does not exist' with ir.Context() as ctx, ir.Location.unknown(): _run_test(support_lib, _KERNEL_STR)
def compiler(): with ir.Context(), ir.Location.unknown(): kernel_func = get_kernel_func_from_module(module) timer_func = emit_timer_func() wrapped_func = emit_benchmark_wrapped_main_func( kernel_func, timer_func) main_module_with_benchmark = ir.Module.parse( str(timer_func) + str(wrapped_func) + str(kernel_func)) setup_passes(main_module_with_benchmark) c_runner_utils = os.getenv("MLIR_C_RUNNER_UTILS", "") assert os.path.exists(c_runner_utils),\ f"{c_runner_utils} does not exist." \ f" Please pass a valid value for" \ f" MLIR_C_RUNNER_UTILS environment variable." runner_utils = os.getenv("MLIR_RUNNER_UTILS", "") assert os.path.exists(runner_utils),\ f"{runner_utils} does not exist." \ f" Please pass a valid value for MLIR_RUNNER_UTILS" \ f" environment variable." engine = ExecutionEngine( main_module_with_benchmark, 3, shared_libs=[c_runner_utils, runner_utils]) return engine.invoke
def testSpMM(): support_lib = os.getenv('SUPPORT_LIB') with ir.Context() as ctx, ir.Location.unknown(): count = 0 # Fixed compiler optimization strategy. # TODO: explore state space here too par = 0 vec = 0 vl = 1 e = False opt = (f'parallelization-strategy={par} ' f'vectorization-strategy={vec} ' f'vl={vl} enable-simd-index32={e}') # Exhaustive loop over various ways to annotate a kernel with # a *single* sparse tensor. Even this subset already gives # quite a large state space! levels = [[st.DimLevelType.dense, st.DimLevelType.dense], [st.DimLevelType.dense, st.DimLevelType.compressed], [st.DimLevelType.compressed, st.DimLevelType.dense], [st.DimLevelType.compressed, st.DimLevelType.compressed]] orderings = [ ir.AffineMap.get_permutation([0, 1]), ir.AffineMap.get_permutation([1, 0]) ] bitwidths = [0, 8, 32] for level in levels: for ordering in orderings: for pwidth in bitwidths: for iwidth in bitwidths: attr = st.EncodingAttr.get(level, ordering, pwidth, iwidth) compiler = SparseCompiler(options=opt) build_compile_and_run_SpMM(attr, support_lib, compiler) count = count + 1 print('Passed ', count, 'tests')
def test_compile_and_run(self): mlirdir = resource_loader.get_data_files_path() for filename in os.listdir(mlirdir): if not filename.endswith('.mlir'): continue with open(os.path.join(mlirdir, filename), mode='r') as f: mlir_function = f.read() arg_attrs = [] with ir.Context() as ctx: ctx.allow_unregistered_dialects = True module = ir.Module.parse(mlir_function) func = module.body.operations[0] function_name = ir.StringAttr( func.attributes['sym_name']).value if _ARG_ATTRIBUTES_NAME in func.attributes: arg_attrs = ir.ArrayAttr( func.attributes[_ARG_ATTRIBUTES_NAME]) logging.info(f'processing {filename}') start = time.perf_counter() compiled = cpurt.compile(mlir_function, function_name, tf_cpurt.Specialization.ENABLED, vectorize=False) end = time.perf_counter() logging.info( f'compiled {filename} in {end-start:0.4f} seconds') if not arg_attrs: continue args = [] for arg_attr in arg_attrs: attr_dict = ir.DictAttr(arg_attr) if _SHAPE_VALUE_ATTRIBUTE_NAME in attr_dict: shape_value_attr = ir.DenseIntElementsAttr( attr_dict[_SHAPE_VALUE_ATTRIBUTE_NAME]) shape_value = np.array(list(shape_value_attr)).astype( np.int32) args.append(shape_value) elif _STATIC_TYPE_ATTRIBUTE_NAME in attr_dict: static_type = ir.TypeAttr( attr_dict[_STATIC_TYPE_ATTRIBUTE_NAME]).value shaped_type = ir.ShapedType(static_type) dims = [] for i in range(shaped_type.rank): dims.append(shaped_type.get_dim_size(i)) np_element_type = TfCompileAndRunTest.mlir_type_to_np_type( shaped_type.element_type) arg = np.random.uniform( -10000.0, 10000.0, size=dims).astype(np_element_type) args.append(arg) if len(args) != len(arg_attrs): logging.error( 'expected valid python_test_attrs attributes for each argument' ) continue start = time.perf_counter() cpurt.execute(compiled, args) end = time.perf_counter() logging.info( f'executed {filename} in {end-start:0.4f} seconds')
def output_sparse_tensor( tensor: ctypes.c_void_p, filename: str, sparsity: Sequence[sparse_tensor.DimLevelType]) -> None: """Outputs an MLIR sparse tensor to the given file. Args: tensor: A C pointer to the MLIR sparse tensor. filename: A string for the name of the file that contains the tensor data in a COO-flavored format. sparsity: A sequence of DimLevelType values, one for each dimension of the tensor. Raises: OSError: If there is any problem in loading the supporting C shared library. ValueError: If the shared library doesn't contain the needed routine. """ with ir.Context() as ctx, ir.Location.unknown(): module = _get_output_sparse_tensor_kernel(sparsity) module = ir.Module.parse(module) engine = compile_and_build_engine(module) # Convert the filename to a byte stream. c_filename = ctypes.c_char_p(bytes(filename, "utf-8")) arg_pointers = [ ctypes.byref(ctypes.cast(tensor, ctypes.c_void_p)), ctypes.byref(c_filename) ] # Invoke the execution engine to run the module and return the result. engine.invoke(_ENTRY_NAME, *arg_pointers)
def main(): support_lib = os.getenv('SUPPORT_LIB') assert support_lib is not None, 'SUPPORT_LIB is undefined' if not os.path.exists(support_lib): raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), support_lib) # CHECK-LABEL: TEST: test_output print('\nTEST: test_output') count = 0 with ir.Context() as ctx, ir.Location.unknown(): # Loop over various sparse types: CSR, DCSR, CSC, DCSC. levels = [[st.DimLevelType.dense, st.DimLevelType.compressed], [st.DimLevelType.compressed, st.DimLevelType.compressed]] orderings = [ ir.AffineMap.get_permutation([0, 1]), ir.AffineMap.get_permutation([1, 0]) ] bitwidths = [8, 16, 32, 64] for level in levels: for ordering in orderings: for bwidth in bitwidths: attr = st.EncodingAttr.get(level, ordering, bwidth, bwidth) compiler = SparseCompiler() build_compile_and_run_output(attr, support_lib, compiler) count = count + 1 # CHECK: Passed 16 tests print('Passed', count, 'tests')
def __init__(self, value: Any): with _ir.Context(): if isinstance(value, float): self.value = str(_ir.FloatAttr.get_f64(float(value))) elif isinstance(value, int): self.value = str( _ir.IntegerAttr.get(_ir.IntegerType.get_signless(64), int(value))) else: raise ValueError(f"const requires int or float. Got: {type(value)}")
def test_compile_and_run(self): filename = FLAGS.test_file_name if not os.path.isabs(filename): filename = os.path.join(resource_loader.get_data_files_path(), filename) with gfile.GFile(filename, mode='r') as f: mlir_function = f.read() arg_attrs = [] with ir.Context() as ctx: ctx.allow_unregistered_dialects = True module = ir.Module.parse(mlir_function) func = module.body.operations[0] function_name = ir.StringAttr(func.attributes['sym_name']).value # If the function has arguments, we expect argument attributes. if func.regions[0].blocks[0].arguments: self.assertIn(_ARG_ATTRIBUTES_NAME, func.attributes) arg_attrs = ir.ArrayAttr(func.attributes[_ARG_ATTRIBUTES_NAME]) logging.info(f'processing {filename}') start = time.perf_counter() compiled = jitrt.compile( mlir_function, function_name, tf_jitrt.Specialization.ENABLED, vectorize=FLAGS.vectorize) end = time.perf_counter() logging.info(f'compiled {filename} in {end-start:0.4f} seconds') np.random.seed(FLAGS.input_data_seed) args = [] for arg_attr in arg_attrs: attr_dict = ir.DictAttr(arg_attr) if _SHAPE_VALUE_ATTRIBUTE_NAME in attr_dict: shape_value_attr = ir.DenseIntElementsAttr( attr_dict[_SHAPE_VALUE_ATTRIBUTE_NAME]) shape_value = np.array(list(shape_value_attr)).astype(np.int32) args.append(shape_value) elif _STATIC_TYPE_ATTRIBUTE_NAME in attr_dict: static_type = ir.TypeAttr( attr_dict[_STATIC_TYPE_ATTRIBUTE_NAME]).value shaped_type = ir.ShapedType(static_type) np_element_type = CompileAndRunTest.mlir_type_to_np_type( shaped_type.element_type) arg = np.random.uniform( -10000.0, 10000.0, size=shaped_type.shape).astype(np_element_type) args.append(arg) self.assertEqual(len(args), len(arg_attrs)) start = time.perf_counter() result = jitrt.execute(compiled, args) end = time.perf_counter() logging.info(f'executed {filename} in {end-start:0.4f} seconds') if FLAGS.compare_with_tensorflow: start = time.perf_counter() expected = tfrt_fallback.run_tfrt_fallback(mlir_function, function_name, args) end = time.perf_counter() logging.info( f'executed {filename} via tfrt fallback in {end-start:0.4f} seconds' ) np.testing.assert_allclose(result, expected, rtol=1e-5, atol=1e-5)
def test_compile_and_run(self): filename = FLAGS.test_file_name if not os.path.isabs(filename): filename = os.path.join(resource_loader.get_data_files_path(), filename) with gfile.GFile(filename, mode='r') as f: mlir_function = f.read() arg_attrs = [] with ir.Context() as ctx: ctx.allow_unregistered_dialects = True module = ir.Module.parse(mlir_function) func = module.body.operations[0] function_name = ir.StringAttr( func.attributes['sym_name']).value if _ARG_ATTRIBUTES_NAME in func.attributes: arg_attrs = ir.ArrayAttr( func.attributes[_ARG_ATTRIBUTES_NAME]) logging.info(f'processing {filename}') start = time.perf_counter() compiled = jitrt.compile(mlir_function, function_name, tf_jitrt.Specialization.ENABLED, vectorize=FLAGS.vectorize) end = time.perf_counter() logging.info(f'compiled {filename} in {end-start:0.4f} seconds') if not arg_attrs: return np.random.seed(FLAGS.input_data_seed) args = [] for arg_attr in arg_attrs: attr_dict = ir.DictAttr(arg_attr) if _SHAPE_VALUE_ATTRIBUTE_NAME in attr_dict: shape_value_attr = ir.DenseIntElementsAttr( attr_dict[_SHAPE_VALUE_ATTRIBUTE_NAME]) shape_value = np.array(list(shape_value_attr)).astype( np.int32) args.append(shape_value) elif _STATIC_TYPE_ATTRIBUTE_NAME in attr_dict: static_type = ir.TypeAttr( attr_dict[_STATIC_TYPE_ATTRIBUTE_NAME]).value shaped_type = ir.ShapedType(static_type) np_element_type = CompileAndRunTest.mlir_type_to_np_type( shaped_type.element_type) arg = np.random.uniform( -10000.0, 10000.0, size=shaped_type.shape).astype(np_element_type) args.append(arg) if len(args) != len(arg_attrs): logging.error( 'expected valid python_test_attrs attributes for each argument' ) return start = time.perf_counter() jitrt.execute(compiled, args) end = time.perf_counter() logging.info(f'executed {filename} in {end-start:0.4f} seconds')
def main(): support_lib = os.getenv('SUPPORT_LIB') assert support_lib is not None, 'SUPPORT_LIB is undefined' if not os.path.exists(support_lib): raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), support_lib) # CHECK-LABEL: TEST: testSDDMMM print('\nTEST: testSDDMMM') with ir.Context() as ctx, ir.Location.unknown(): count = 0 # Loop over various ways to compile and annotate the SDDMM kernel with # a *single* sparse tensor. Note that we deliberate do not exhaustively # search the full state space to reduce runtime of the test. It is # straightforward to adapt the code below to explore more combinations. levels = [[st.DimLevelType.dense, st.DimLevelType.dense], [st.DimLevelType.dense, st.DimLevelType.compressed], [st.DimLevelType.compressed, st.DimLevelType.dense], [st.DimLevelType.compressed, st.DimLevelType.compressed]] orderings = [ ir.AffineMap.get_permutation([0, 1]), ir.AffineMap.get_permutation([1, 0]) ] for level in levels: for ordering in orderings: for pwidth in [32]: for iwidth in [32]: for par in [0]: for vec in [0, 1]: for e in [True]: vl = 1 if vec == 0 else 16 attr = st.EncodingAttr.get( level, ordering, pwidth, iwidth) opt = (f'parallelization-strategy={par} ' f'vectorization-strategy={vec} ' f'vl={vl} enable-simd-index32={e}') compiler = sparse_compiler.SparseCompiler( options=opt, opt_level=0, shared_libs=[support_lib]) build_compile_and_run_SDDMMM( attr, compiler) count = count + 1 # CHECK: Passed 16 tests print('Passed ', count, 'tests')
def create_sparse_tensor( filename: str, sparsity: Sequence[sparse_tensor.DimLevelType] ) -> Tuple[ctypes.c_void_p, np.ndarray]: """Creates an MLIR sparse tensor from the input file. Args: filename: A string for the name of the file that contains the tensor data in a COO-flavored format. sparsity: A sequence of DimLevelType values, one for each dimension of the tensor. Returns: A Tuple containing the following values: storage: A ctypes.c_void_p for the MLIR sparse tensor storage. shape: A 1D numpy array of integers, for the shape of the tensor. Raises: OSError: If there is any problem in loading the supporting C shared library. ValueError: If the shared library doesn't contain the needed routine. """ with ir.Context() as ctx, ir.Location.unknown(): module = _get_create_sparse_tensor_kernel(sparsity) module = ir.Module.parse(module) engine = compile_and_build_engine(module) # A sparse tensor descriptor to receive the kernel result. c_tensor_desc = _SparseTensorDescriptor() # Convert the filename to a byte stream. c_filename = ctypes.c_char_p(bytes(filename, "utf-8")) arg_pointers = [ ctypes.byref(ctypes.pointer(c_tensor_desc)), ctypes.byref(c_filename) ] # Invoke the execution engine to run the module and return the result. engine.invoke(_ENTRY_NAME, *arg_pointers) shape = runtime.ranked_memref_to_numpy(ctypes.pointer(c_tensor_desc.shape)) return c_tensor_desc.storage, shape
def testSpMM(): # Obtain path to runtime support library. support_lib = os.getenv('SUPPORT_LIB') assert os.path.exists(support_lib), f'{support_lib} does not exist' with ir.Context() as ctx, ir.Location.unknown(): count = 0 # Loop over various ways to compile and annotate the SpMM kernel with # a *single* sparse tensor. Note that we deliberate do not exhaustively # search the full state space to reduce runtime of the test. It is # straightforward to adapt the code below to explore more combinations. par = 0 vec = 0 vl = 1 e = False opt = (f'parallelization-strategy={par} ' f'vectorization-strategy={vec} ' f'vl={vl} enable-simd-index32={e}') levels = [[st.DimLevelType.dense, st.DimLevelType.dense], [st.DimLevelType.dense, st.DimLevelType.compressed], [st.DimLevelType.compressed, st.DimLevelType.dense], [st.DimLevelType.compressed, st.DimLevelType.compressed]] orderings = [ ir.AffineMap.get_permutation([0, 1]), ir.AffineMap.get_permutation([1, 0]) ] bitwidths = [0] for level in levels: for ordering in orderings: for pwidth in bitwidths: for iwidth in bitwidths: attr = st.EncodingAttr.get(level, ordering, pwidth, iwidth) compiler = SparseCompiler(options=opt) build_compile_and_run_SpMM(attr, support_lib, compiler) count = count + 1 print('Passed ', count, 'tests')
def __init__(self, comprehension: Comprehension, context: Optional[_ir.Context] = None): self.context = context if context is not None else _ir.Context() self.affine_state = AffineBuildState() self.writes = list() # type: List[Tuple[TensorUse, TensorExpression]] self.tensor_args = dict() # type: Dict[TensorDef, TensorDefConfig] self.capture_args = dict() # type: Dict[CaptureDef, CaptureDefConfig] self.uses = dict() # type: Dict[TensorUse, TensorUseConfig] # Compute the ordered set of writes and collect the tensor, capture, and # index uses. collected_uses = set() collected_captures = set() collected_indices = set() for write_use, read_use in zip(comprehension.definitions, comprehension.values): self.writes.append((write_use, read_use)) for write_use, read_use in self.writes: collected_uses.add(write_use) read_use.collect_uses(collected_uses) read_use.collect_captures(collected_captures) read_use.collect_indices(collected_indices) # Need to add all definitions before uses, so process twice. for use in collected_uses: self.add_tensor_arg(use.tensor_def) for capture in collected_captures: self.add_capture_arg(capture) for use in collected_uses: self.add_use(use) # Now normalize all defs and uses indexing maps now that full count of # dims and symbols are known. for cuse in self.uses.values(): cuse.indexing_map = self._normalize_affine_map(cuse.indexing_map) for cdef in self.tensor_args.values(): cdef.shape_map = self._normalize_affine_map(cdef.shape_map, with_dims=False) # Now for each write use, propagate the indexing maps from the use to the # tensor, ensuring that there are not conflicts. for write_use, _ in self.writes: write_tensor_def = self.tensor_args[write_use.tensor_def] if write_tensor_def.indexing_map: raise ValueError( f"Unexpected multi-write to a single tensor: {write_tensor_def}") write_tensor_def.indexing_map = self.uses[write_use].indexing_map # For each read use, propagate the indexing maps from the use to the # tensor, ensuring that there are not conflicts. for _, read_expr in self.writes: read_uses = set() # type: Set[TensorUse] read_expr.collect_uses(read_uses) for read_use in read_uses: read_tensor_def = self.tensor_args[read_use.tensor_def] if (read_tensor_def.indexing_map and read_tensor_def.indexing_map != self.uses[read_use].indexing_map): raise ValueError( f"Unexpected multi-read of a tensor with different accesses:" f"{read_tensor_def} vs {read_use}") read_tensor_def.indexing_map = self.uses[read_use].indexing_map # Sanity check that all defs have an indexing map. assert all(d.indexing_map for d in self.tensor_args.values()), ( f"Missing indexing map on TensorDef: {self.tensor_args}") # Collect reduction dims and ensure all the same. all_reduction_dims = set(comprehension.all_reduction_dims) if len(all_reduction_dims) != 1: raise ValueError( f"All writes within a generic must have the same reduction " f"dims. Got: {all_reduction_dims}") self.reduction_dims = next(iter(all_reduction_dims)) # Check the index dimension exists and resolve for index in collected_indices: if index.dim_def.dimname not in self.affine_state.all_dims: raise ValueError( f"The dimension {index.dim.dimname} is not part of the iteration " f"domain {self.affine_state.all_dims}") index.resolve_dimension_name(self.affine_state) # Generate the scalar assignments (used to build a body). self.assignments = [ ScalarAssign(write_use.tensor_name, read_expr.to_scalar_expression()) for write_use, read_expr in self.writes ]
def __init__(self, comprehension: Comprehension, domain: Sequence[DimDef], registered_operands: Sequence[OperandDef], context: Optional[_ir.Context] = None): self.context = context if context is not None else _ir.Context() self.affine_state = AffineBuildState() self.writes = list() # type: List[Tuple[TensorUse, TensorExpression]] self.operands = dict() # type: Dict[OperandDef, OperandDefConfig] self.uses = dict() # type: Dict[TensorUse, TensorUseConfig] # Compute the ordered set of writes and collect the tensor, capture, dims, # and index uses. collected_tensor_uses = set() collected_scalar_uses = set() collected_dim_uses = set() collected_indices = set() for write_use, read_use in zip(comprehension.definitions, comprehension.values): self.writes.append((write_use, read_use)) for write_use, read_use in self.writes: collected_tensor_uses.add(write_use) read_use.collect_tensor_uses(collected_tensor_uses) read_use.collect_scalar_uses(collected_scalar_uses) read_use.collect_dim_uses(collected_dim_uses) write_use.collect_dim_uses(collected_dim_uses) read_use.collect_indices(collected_indices) # Set domain to the sorted list of uses if no domain annotation is given. if not domain: domain = sorted(collected_dim_uses, key=lambda dim: dim.dimname) # Verify the domain dimensions match the used dimensions. if (len(domain) != len(collected_dim_uses) or any(dim not in collected_dim_uses for dim in domain)): raise ValueError(f"Expected the annotated domain dimensions {domain} to " f"match the set of dimension used by the tensor " f"comprehension {collected_dim_uses}") # Instantiate the dimensions in the given order. with self.context: local_state = AffineBuildState( global_state=self.affine_state, allow_new_symbols=False) for dim in domain: dim.build(state=local_state) # Collect all attribute definitions. collected_attr_defs = list() for operand in registered_operands: if operand.kind == OperandKind.Attribute: collected_attr_defs.append(operand) # Collect all tensors with manual indexing annotation. collected_index_defs = list() for operand in registered_operands: if operand.index_dims: if any(dim not in collected_dim_uses for dim in operand.index_dims): raise ValueError(f"Expected all index dims {operand.index_dims} of " f"operand {operand.name} to have uses.") collected_index_defs.append(operand) # Collect the operand definitions of all tensor/scalar uses, attributes, and # shape-only tensors. all_operand_defs = list() for use in collected_tensor_uses: all_operand_defs.append(use.operand_def) for use in collected_scalar_uses: all_operand_defs.append(use.operand_def) for definition in collected_attr_defs: all_operand_defs.append(definition) for definition in collected_index_defs: all_operand_defs.append(definition) # Add all operands in registration order to ensure the symbols are # registered in the order they appear. all_operand_defs = sorted( all_operand_defs, key=lambda operand_def: operand_def.registered_index) for operand_def in all_operand_defs: self.add_operand(operand_def) # Add all shape-only tensor index_dim annotations and all tensor uses. for definition in collected_index_defs: self.add_indexed_operand(definition) for use in collected_tensor_uses: self.add_tensor_use(use) # Normalize all shape and indexing maps now that full count of dims and # symbols are known. for cuse in self.uses.values(): cuse.indexing_map = self._normalize_affine_map(cuse.indexing_map) for definition in collected_index_defs: self.operands[definition].indexing_map = self._normalize_affine_map( self.operands[definition].indexing_map) for operand_config in self.operands.values(): if operand_config.shape_map: operand_config.shape_map = self._normalize_affine_map( operand_config.shape_map, with_dims=False) if operand_config.attribute_map: operand_config.attribute_map = self._normalize_affine_map( operand_config.attribute_map, with_dims=False) # Now for each write use, propagate the indexing maps from the use to the # tensor, ensuring that there are not conflicts. for write_use, _ in self.writes: write_tensor_config = self.operands[write_use.operand_def] if write_tensor_config.indexing_map: raise ValueError( f"Unexpected multi-write to a single tensor: {write_tensor_config}") write_tensor_config.indexing_map = self.uses[write_use].indexing_map # For each read use, propagate the indexing maps from the use to the # tensor, ensuring that there are not conflicts. for _, read_expr in self.writes: read_uses = set() # type: Set[TensorUse] read_expr.collect_tensor_uses(read_uses) for read_use in read_uses: read_operand_config = self.operands[read_use.operand_def] if (read_operand_config.indexing_map and read_operand_config.indexing_map != self.uses[read_use].indexing_map): raise ValueError( f"Unexpected multi-read of a tensor with different accesses:" f"{read_operand_config} vs {read_use}") read_operand_config.indexing_map = self.uses[read_use].indexing_map # Set the indexing map of all scalar uses to the empty map. for operand_config in self.operands.values(): if operand_config.operand_def.kind == OperandKind.Scalar: operand_config.indexing_map = self._get_scalar_map() # Check all registered tensor and scalar operands have an indexing map. for operand in registered_operands: if operand.kind == OperandKind.Attribute: continue if not (operand in self.operands and self.operands[operand].indexing_map): raise ValueError(f"Failed to compute an indexing map for operand " f"{operand.name}") # Collect reduction dims and ensure all the same. all_reduction_dims = set(comprehension.all_reduction_dims) if len(all_reduction_dims) != 1: raise ValueError( f"All writes within a generic must have the same reduction " f"dims. Got: {all_reduction_dims}") self.reduction_dims = next(iter(all_reduction_dims)) # Check the index dimension exists and resolve. for index in collected_indices: if index.dim_def.dimname not in self.affine_state.all_dims: raise ValueError( f"The dimension {index.dim.dimname} is not part of the iteration " f"domain {self.affine_state.all_dims}") index.resolve_dimension_name(self.affine_state) # Generate the scalar assignments (used to build a body). self.assignments = [ ScalarAssign(write_use.tensor_name, read_expr.to_scalar_expression()) for write_use, read_expr in self.writes ]
def main(): """ USAGE: python3 test_stress.py [raw_module.mlir [compiled_module.mlir]] The environment variable SUPPORT_LIB must be set to point to the libmlir_c_runner_utils shared library. There are two optional arguments, for debugging purposes. The first argument specifies where to write out the raw/generated ir.Module. The second argument specifies where to write out the compiled version of that ir.Module. """ support_lib = os.getenv('SUPPORT_LIB') assert support_lib is not None, 'SUPPORT_LIB is undefined' if not os.path.exists(support_lib): raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), support_lib) # CHECK-LABEL: TEST: test_stress print("\nTEST: test_stress") with ir.Context() as ctx, ir.Location.unknown(): par = 0 vec = 0 vl = 1 e = False sparsification_options = (f'parallelization-strategy={par} ' f'vectorization-strategy={vec} ' f'vl={vl} ' f'enable-simd-index32={e}') compiler = SparseCompiler(sparsification_options, support_lib) f64 = ir.F64Type.get() # Be careful about increasing this because # len(types) = 1 + 2^rank * rank! * len(bitwidths)^2 shape = range(2, 6) rank = len(shape) # All combinations. levels = list( itertools.product(*itertools.repeat( [st.DimLevelType.dense, st.DimLevelType.compressed], rank))) # All permutations. orderings = list( map(ir.AffineMap.get_permutation, itertools.permutations(range(rank)))) bitwidths = [0] # The first type must be a dense tensor for numpy conversion to work. types = [ir.RankedTensorType.get(shape, f64)] for level in levels: for ordering in orderings: for pwidth in bitwidths: for iwidth in bitwidths: attr = st.EncodingAttr.get(level, ordering, pwidth, iwidth) types.append(ir.RankedTensorType.get(shape, f64, attr)) # # For exhaustiveness we should have one or more StressTest, such # that their paths cover all 2*n*(n-1) directed pairwise combinations # of the `types` set. However, since n is already superexponential, # such exhaustiveness would be prohibitive for a test that runs on # every commit. So for now we'll just pick one particular path that # at least hits all n elements of the `types` set. # tyconv = TypeConverter(ctx) size = 1 for d in shape: size *= d np_arg0 = np.arange(size, dtype=tyconv.irtype_to_dtype(f64)).reshape(*shape) np_out = ( StressTest(tyconv).build(types).writeTo( sys.argv[1] if len(sys.argv) > 1 else None).compile(compiler). writeTo(sys.argv[2] if len(sys.argv) > 2 else None).run(np_arg0)) # CHECK: Passed if np.allclose(np_out, np_arg0): print('Passed') else: sys.exit('FAILURE')
def test_compile_and_run(self): filename = _TEST_FILE_NAME.value if not os.path.isabs(filename): filename = os.path.join(resource_loader.get_data_files_path(), filename) with gfile.GFile(filename, mode='r') as f: mlir_function = f.read() arg_attrs = [] with ir.Context() as ctx: ctx.allow_unregistered_dialects = True module = ir.Module.parse(mlir_function) func = module.body.operations[0] function_type = ir.FunctionType( ir.TypeAttr(func.attributes[_FUNCTION_TYPE_NAME]).value) function_name = ir.StringAttr( func.attributes['sym_name']).value # If the function has arguments, we expect argument attributes. entry_block = func.regions[0].blocks[0] if entry_block.arguments: self.assertIn(_ARG_ATTRIBUTES_NAME, func.attributes) arg_attrs = ir.ArrayAttr( func.attributes[_ARG_ATTRIBUTES_NAME]) logging.info(f'processing {filename}') start = time.perf_counter() compiled = jitrt.compile( mlir_function, function_name, tf_jitrt.Specialization.ENABLED, vectorize=_VECTORIZE.value, one_shot_bufferize=_ONE_SHOT_BUFFERIZE.value) end = time.perf_counter() logging.info(f'compiled {filename} in {end-start:0.4f} seconds') np.random.seed(_INPUT_DATA_SEED.value) args = [] for arg_attr in arg_attrs: attr_dict = ir.DictAttr(arg_attr) if _SHAPE_VALUE_ATTRIBUTE_NAME in attr_dict: shape_value_attr = ir.DenseIntElementsAttr( attr_dict[_SHAPE_VALUE_ATTRIBUTE_NAME]) shape_value = np.array(list(shape_value_attr)).astype( np.int32) args.append(shape_value) elif _STATIC_TYPE_ATTRIBUTE_NAME in attr_dict: static_type = ir.TypeAttr( attr_dict[_STATIC_TYPE_ATTRIBUTE_NAME]).value shaped_type = ir.ShapedType(static_type) np_element_type = CompileAndRunTest.mlir_type_to_np_type( shaped_type.element_type) arg = np.random.uniform( -10000.0, 10000.0, size=shaped_type.shape).astype(np_element_type) args.append(arg) self.assertEqual(len(args), len(arg_attrs)) start = time.perf_counter() result = jitrt.execute(compiled, args) end = time.perf_counter() logging.info(f'executed {filename} in {end-start:0.4f} seconds') if _COMPARE_WITH_TENSORFLOW.value: start = time.perf_counter() expected = tfrt_fallback.run_tfrt_fallback( mlir_function, function_name, args) end = time.perf_counter() logging.info( f'executed {filename} via tfrt fallback in {end-start:0.4f} seconds' ) if len(function_type.results) > 1: # If there is more than one result, we need to iterate manually, # otherwise np.testing.assert_allclose will complain if not all # results have equal size. self.assertEqual(len(result), len(expected)) for res, expect in zip(result, expected): np.testing.assert_allclose(res, expect, rtol=1e-5, atol=1e-5) else: np.testing.assert_allclose(result, expected, rtol=1e-5, atol=1e-5)
# REQUIRES: bindings_python # RUN: %PYTHON% %s | FileCheck %s import circt from circt.dialects import hw, sv from mlir import ir with ir.Context() as ctx, ir.Location.unknown() as loc: circt.register_dialects(ctx) ctx.allow_unregistered_dialects = True sv_attr = sv.SVAttributeAttr.get("fold", "false") print(f"sv_attr: {sv_attr} {sv_attr.name} {sv_attr.expression}") # CHECK: sv_attr: #sv.attribute<"fold" = "false"> fold false sv_attr = sv.SVAttributeAttr.get("no_merge") print(f"sv_attr: {sv_attr} {sv_attr.name} {sv_attr.expression}") # CHECK: sv_attr: #sv.attribute<"no_merge"> no_merge None i1 = ir.IntegerType.get_signless(1) i1_inout = hw.InOutType.get(i1) m = ir.Module.create() with ir.InsertionPoint(m.body): wire_op = sv.WireOp(i1_inout, "wire1") wire_op.attributes["sv.attributes"] = ir.ArrayAttr.get([sv_attr]) print(wire_op) # CHECK: %wire1 = sv.wire {sv.attributes = [#sv.attribute<"no_merge">]} : !hw.inout<i1> reg_op = sv.RegOp(i1_inout, "reg1")
def benchmark_sparse_mlir_multiplication(): """Benchmark for mlir sparse matrix multiplication. Because its an MLIR benchmark we need to return both a `compiler` function and a `runner` function. """ with ir.Context(), ir.Location.unknown(): module = ir.Module.create() f64 = ir.F64Type.get() param1_type = ir.RankedTensorType.get([1000, 1500], f64) param2_type = ir.RankedTensorType.get([1500, 2000], f64) result_type = ir.RankedTensorType.get([1000, 2000], f64) with ir.InsertionPoint(module.body): @func.FuncOp.from_py_func(param1_type, param2_type, result_type) def sparse_kernel(x, y, z): return matmul_dsl(x, y, outs=[z]) def compiler(): with ir.Context(), ir.Location.unknown(): kernel_func = get_kernel_func_from_module(module) timer_func = emit_timer_func() wrapped_func = emit_benchmark_wrapped_main_func( kernel_func, timer_func) main_module_with_benchmark = ir.Module.parse( str(timer_func) + str(wrapped_func) + str(kernel_func)) setup_passes(main_module_with_benchmark) c_runner_utils = os.getenv("MLIR_C_RUNNER_UTILS", "") assert os.path.exists(c_runner_utils),\ f"{c_runner_utils} does not exist." \ f" Please pass a valid value for" \ f" MLIR_C_RUNNER_UTILS environment variable." runner_utils = os.getenv("MLIR_RUNNER_UTILS", "") assert os.path.exists(runner_utils),\ f"{runner_utils} does not exist." \ f" Please pass a valid value for MLIR_RUNNER_UTILS" \ f" environment variable." engine = ExecutionEngine( main_module_with_benchmark, 3, shared_libs=[c_runner_utils, runner_utils]) return engine.invoke def runner(engine_invoke): compiled_program_args = [] for argument_type in [ result_type, param1_type, param2_type, result_type ]: argument_type_str = str(argument_type) dimensions_str = re.sub("<|>|tensor", "", argument_type_str) dimensions = [int(dim) for dim in dimensions_str.split("x")[:-1]] if argument_type == result_type: argument = np.zeros(dimensions, np.float64) else: argument = create_sparse_np_tensor(dimensions, 1000) compiled_program_args.append( ctypes.pointer( ctypes.pointer(rt.get_ranked_memref_descriptor(argument)))) np_timers_ns = np.array([0], dtype=np.int64) compiled_program_args.append( ctypes.pointer( ctypes.pointer(rt.get_ranked_memref_descriptor(np_timers_ns)))) engine_invoke("main", *compiled_program_args) return int(np_timers_ns[0]) return compiler, runner