def generate(tc: str, entry_point: str, *inputs: torch.Tensor) -> MappingOptions: cache = MappingOptionsCache(cache_filename) loaded = cache.load(tc, entry_point, inputs, 1) if len(loaded) > 0: return loaded[0] return MappingOptions('naive')
def compileOrTune(self, name="", force_reinforcement_tuning=False, inputs=()): if self.debug: print( "On Tc: {}\ncompile def {}, force_reinforcement_tuning {}, inputs: {}" .format( self.tc, name, force_reinforcement_tuning, "".join("{}/{}, ".format(t.size().__str__(), t.stride().__str__()) for t in inputs))) if not self.compilation_cache.is_compiled(name, inputs): cache = MappingOptionsCache(self.tuner_cache_file) mapping_options = None base_options_list = cache.load(self.tc, name, inputs, 1) if len(base_options_list) > 0 and not force_reinforcement_tuning: mapping_options = base_options_list[0] if self.debug: print("Found best options in {}:\n{}".format( self.tuner_cache_file, mapping_options)) else: if self.debug: print( "########################################################" "########################################################" ) print( "force_reinforcement_tuning = {} was specified, {} options loaded from " "{}".format(force_reinforcement_tuning, len(base_options_list), self.tuner_cache_file)) print( "Starting a tuning run (abort it with Ctrl+C when " "performance is satisfactory.\nYou can always reinforce " "the results later by passing a proper tuner cache file " "and specifying force_reinforcement_tuning=True)") print( "########################################################" "########################################################" ) if len(base_options_list) == 0: mapping_options = MappingOptions() else: mapping_options = base_options_list[0] tuner = Tuner(self.tc, self.tuner_cache_file) mapping_options = tuner.tune(name, inputs, mapping_options, self.tuner_config) self.compilation_cache.compile(name, inputs, mapping_options)
def autotune(tc: str, entry_point: str, *inputs: torch.Tensor, starting_options: Optional[Union[str, MappingOptions]] = None, tuner_config: Optional[TunerConfig] = TunerConfig(), cache_filename: Optional[str] = None, load_from_cache: Optional[bool] = False, store_to_cache: Optional[bool] = False) -> MappingOptions: r"""Tunes the defined TC function for given inputs. The MappingOptions from which tuning starts is either passed explicitly via :code:`starting_options` or loaded from a cache file (when both :code:`cache_filename` and :code:`load_from_cache` are properly specified). Exactly one of :code:`starting_options` and :code:`load_from_cache` must be specified. It is possible to obtain a reinforcement tuning behavior by tuning over multiple executions and specifying both :code:`load_from_cache` and :code:`store_to_cache`. It is recommended to only use a single cache file for all TC defs and reinforce it over time. An example of usage is provided with :func:`autotune_and_compile`. :param tc: a string containing one of more TC defs. :param entry_point: the name of the TC def to compile and execute. :param inputs: PyTorch Tensors that TC should tune for. The inputs must be passed in the order they are also passed in the definition of the TC function. :param starting_options: :class:`~tclib.MappingOptions` from which tuning should start. :param tuner_config: :class:`~tclib.TunerConfig` to control the behavior of the autotuner. :param load_from_cache: Get the starting :class:`~tclib.MappingOptions` by loading from :code:`cache_filename`. If loading fails to recover an entry from the cache file for the given input sizes an assertion error will trigger. :param store_to_cache: Optionally store the best result by appending it to the backing cache file. Returns: The best options found during this tuning run. """ if cache_filename is not None: assert load_from_cache or store_to_cache, ( "cache_filename specified" + "must also specify load_from_cache or store_to_cache") if load_from_cache or store_to_cache: assert cache_filename is not None, ( "load_from_cache or store_to_cache" + " specified, must also specify cache_filename") assert starting_options is not None or load_from_cache, ( "Must specify either starting_options or load_from_cache, choose one!") assert starting_options is None or not load_from_cache, ( "Cannot specify both starting_options and load_from_cache, choose one!" ) base_options = None if load_from_cache: cache = MappingOptionsCache(cache_filename) loaded = cache.load(tc, entry_point, inputs, 1) assert len(loaded) > 0, ( "Could not load from cache for TC {} and sizes {}".format( entry_point, "".join(str(i.size()) + " " for i in inputs))) base_options = loaded[0] else: base_options = (MappingOptions(starting_options) if isinstance( starting_options, str) else starting_options) # TODO: This is still an implicit store behavior in the C++ API, # make it explicit... tuner = Tuner(tc, cache_filename if store_to_cache else "") return tuner.tune(entry_point, inputs, base_options, tuner_config)
# Run once without timing compilation_cache.unchecked_run("matmul", (A, B)) # unchecked_run on tensors time_tc(100, "raw unchecked_run naive options\t", lambda name, ins: compilation_cache.unchecked_run(name, ins), "matmul", (A, B)) ################################################################################ # 3. Short tuning run saving to file then load the best option to create a # compilation cache ################################################################################ with tempfile.NamedTemporaryFile() as cache_file: tuner = Tuner(mm, cache_file.name) top1 = tuner.tune("matmul", (A, B), MappingOptions('naive'), tuner_config) cache = MappingOptionsCache(cache_file.name) top10 = cache.load(mm, "matmul", (A, B), 10) assert top1.__str__() == top10[0].__str__() # Compile and run with the new options compilation_cache.compile("matmul", (A, B), top1) time_tc(100, "raw unchecked_run tuned options\t", lambda name, ins: compilation_cache.unchecked_run(name, ins), "matmul", (A, B)) ################################################################################ # 4. Simple torch.autograd.Function ################################################################################ T = tc.define( mm, tc.make_autotuned_options_factory(starting_options='naive',
################################################################################ from tensor_comprehensions.tclib import Tuner from tensor_comprehensions.tclib import MappingOptionsCache from tensor_comprehensions.tclib import TunerConfig import uuid unique_filename = "/tmp/" + str(uuid.uuid4()) print("Tune with cache @", unique_filename) print("Note that if you pass a fixed filename, you can reinforce an " + "existing tuning state") tuner = Tuner(mm, unique_filename) top1 = tuner.tune( "matmul", (mat1, mat2), MappingOptions(), TunerConfig(threads=8, pop_size=25, generations=3, devices="0")) cache = MappingOptionsCache(unique_filename) top10 = cache.load(mm, "matmul", (mat1, mat2), 10) assert top1.__str__() == top10[0].__str__() # Compile and run with the new options compilation_cache.compile("matmul", (mat1, mat2), top1) time_tc(100, "raw unchecked_run tuned options\t", lambda name, ins: compilation_cache.unchecked_run(name, ins, ()), "matmul", (mat1, mat2)) ################################################################################ # 4. Simple TC builder ################################################################################ class TcBuilder(): def __init__(self,