Exemplo n.º 1
0
    def __init__(
        self, tc: str,
        mapping_options_factory: (Callable[[str, str, Iterable[torch.Tensor]],
                                           MappingOptions])):
        self.tc = tc
        self.mapping_options_factory = mapping_options_factory
        self.compilation_cache = CompilationCache(self.tc)

        # Make each TC def in the tc str a method of the TC object so we can:
        #     T = tc.define("def add() ...")
        #     T.add()
        #
        def make_closure(obj: TC, tc_def_name: str):
            def fun(*inputs: torch.Tensor,
                    outputs: Optional[Tuple[torch.Tensor]] = None,
                    unchecked: Optional[bool] = False) -> List[torch.Tensor]:
                return obj(tc_def_name,
                           *inputs,
                           outputs=outputs,
                           unchecked=unchecked)

            return fun

        for tc_def in tclib.parse_defs(self.tc):
            self.__setattr__(tc_def, make_closure(self, tc_def))
Exemplo n.º 2
0
    def __init__(self,
                 tc="",
                 forward_name="",
                 forward_force_reinforcement_tuning=False,
                 backward_name="",
                 backward_force_reinforcement_tuning=False,
                 check_output_shapes=True,
                 tuner_cache_file="",
                 tuner_config=TunerConfig(),
                 debug=False):
        if debug:
            assert isinstance(tc, str), type(tc)
            assert isinstance(forward_name, str), type(forward_name)
            assert isinstance(forward_force_reinforcement_tuning,
                              bool), type(forward_force_reinforcement_tuning)
            assert isinstance(backward_name, str), type(backward_name)
            assert isinstance(backward_force_reinforcement_tuning,
                              bool), type(backward_force_reinforcement_tuning)
            assert isinstance(check_output_shapes,
                              bool), type(tuner_cache_file)
            assert isinstance(tuner_cache_file, str), type(tuner_cache_file)
            assert isinstance(tuner_config, TunerConfig), type(tuner_config)

        self.tc = tc
        self.forward_name = forward_name
        self.forward_force_reinforcement_tuning = forward_force_reinforcement_tuning
        self.backward_name = backward_name
        self.backward_force_reinforcement_tuning = backward_force_reinforcement_tuning
        self.check_output_shapes = check_output_shapes
        self.tuner_cache_file = tuner_cache_file
        self.tuner_config = tuner_config
        self.debug = debug
        self.compilation_cache = CompilationCache(self.tc)
# Define a TC string for matmul and some input torch cuda tensors
mm = """
def matmul(float(M,N) A, float(N,K) B) -> (C) {
    C(m, k) +=! A(m, r_n) * B(r_n, k)
}
def matmul_agrad(float(N,K) B, float(M,K) d_C) -> (d_A) {
    d_A(m, n) +=! d_C(  m, r_k) * B(  n, r_k)
}
def matmul_bgrad(float(M,N) A, float(M,K) d_C) -> (d_B) {
    d_B(n, k) +=! d_C(r_m,   k) * A(r_m,   n)
}
"""
A, B = (torch.randn(300, 400, device='cuda', requires_grad=True),
        torch.randn(400, 500, device='cuda', requires_grad=True))

compilation_cache = CompilationCache(mm)

tuner_config = TunerConfig().threads(8).pop_size(25).generations(3).devices(
    "0")

################################################################################
# 1. Use the simple high-overhead compile/run C++ API
#    If one can keep state in their layer or wishes to experiment with TC,
#    this is a simple entry point.
#    If state cannot be kept, be aware that this API has a non-trivial overhead
#    when outputs sizes need to be inferred and outputs allocated.
#    Compilation itself has a prohibitive cost and needs to be memoized either
#    by holding on to the executor or by using the low-overhead abstraction, see
#    below.
################################################################################
executor = compile(mm, "matmul", (A, B), MappingOptions('naive'))