示例#1
0
def main() -> None:
    parser = argparse.ArgumentParser(description='Generate ATen source files')
    parser.add_argument('-s',
                        '--source-path',
                        help='path to source directory for ATen',
                        default='aten/src/ATen')
    parser.add_argument(
        '-o',
        '--output-dependencies',
        help='output a list of dependencies into the given file and exit')
    parser.add_argument('-d',
                        '--install_dir',
                        help='output directory',
                        default='build/aten/src/ATen')
    parser.add_argument(
        '--rocm',
        action='store_true',
        help='reinterpret CUDA as ROCm/HIP and adjust filepaths accordingly')
    # TODO: --op_registration_whitelist will be removed when all call-sites
    # for gen.py are moved over to using the operator YAML file for mobile
    # custom build.
    parser.add_argument(
        '--op_registration_whitelist',
        nargs='*',
        help='filter op registrations by the whitelist (if set); '
        'each item is `namespace`::`operator name` without overload name; '
        'e.g.: aten::empty aten::conv2d ...')
    parser.add_argument(
        '--op_selection_yaml_path',
        help='Provide a path to the operator selection (for custom build) YAML '
        'that contains the information about the set of selected operators '
        'and their categories (training, ...). Each operator is either a '
        'full operator name with overload or just a bare operator name. '
        'The operator names also contain the namespace prefix (e.g. aten::)')
    parser.add_argument(
        '--backend_whitelist',
        nargs='*',
        help='filter dispatch backend by the whitelist (if set), '
        'e.g.: CPU CUDA QuantizedCPU ...')
    parser.add_argument(
        '--static_dispatch_backend',
        help='generate static dispatch code for the specific backend (if set)')
    parser.add_argument(
        '--force_schema_registration',
        action='store_true',
        help=
        'force it to generate schema-only registrations for all ops, including'
        'those that are not listed on --op_registration_whitelist')
    options = parser.parse_args()

    selector = get_custom_build_selector(
        options.op_registration_whitelist,
        options.op_selection_yaml_path,
    )

    native_functions = parse_native_yaml(
        os.path.join(options.source_path, 'native/native_functions.yaml'))

    pre_grouped_native_functions: Dict[FunctionSchema, Dict[SchemaKind,
                                                            NativeFunction]]
    pre_grouped_native_functions = defaultdict(dict)
    for f in native_functions:
        d = pre_grouped_native_functions[f.func.signature()]
        assert f.func.kind() not in d
        d[f.func.kind()] = f

    def flatten_pre_group(
        d: Dict[SchemaKind, NativeFunction]
    ) -> Sequence[Union[NativeFunction, NativeFunctionsGroup]]:
        r = NativeFunctionsGroup.from_dict(d)
        if r is None:
            return list(d.values())
        else:
            return [r]

    # TODO: how come ValuesView isn't a Sequence lol
    grouped_native_functions = list(
        concatMap(flatten_pre_group,
                  list(pre_grouped_native_functions.values())))
    structured_native_functions = [
        g for g in grouped_native_functions
        if isinstance(g, NativeFunctionsGroup)
    ]

    template_dir = os.path.join(options.source_path, "templates")

    # NB: It is mandatory to NOT use os.path.join here, as the install directory
    # will eventually be ingested by cmake, which does not respect Windows style
    # path slashes.  If you switch this to use os.path.join, you'll get an error
    # like:
    #
    #   Syntax error in cmake code when parsing string
    #
    #     C:/Jenkins/workspace/pytorch-builds/pytorch-win-ws2016-cuda9-cudnn7-py3-build/build/aten/src/ATen\core/TensorMethods.h
    #
    #   Invalid character escape '\c'.
    core_install_dir = f'{options.install_dir}/core'
    pathlib.Path(core_install_dir).mkdir(parents=True, exist_ok=True)

    def make_file_manager(install_dir: str) -> FileManager:
        return FileManager(install_dir=install_dir,
                           template_dir=template_dir,
                           dry_run=options.output_dependencies)

    core_fm = make_file_manager(core_install_dir)
    cpu_fm = make_file_manager(options.install_dir)
    cuda_fm = make_file_manager(options.install_dir)

    extra_cuda_headers = '''\
#include <c10/cuda/CUDAGuard.h>
#include <ATen/cuda/ATenCUDAGeneral.h>
#include <ATen/cuda/CUDADevice.h>
#include <ATen/cuda/CUDAContext.h>'''
    if options.rocm:
        extra_cuda_headers = '''\
#include <ATen/hip/impl/HIPGuardImplMasqueradingAsCUDA.h>
#include <ATen/hip/ATenHIPGeneral.h>
#include <ATen/hip/HIPDevice.h>
#include <ATen/hip/HIPContext.h>'''

    dispatch_keys = [
        DispatchKey.CPU,
        DispatchKey.SparseCPU,
        DispatchKey.SparseCsrCPU,
        DispatchKey.MkldnnCPU,
        DispatchKey.CUDA,
        DispatchKey.SparseCUDA,
        DispatchKey.SparseCsrCUDA,
        DispatchKey.QuantizedCPU,
        DispatchKey.QuantizedCUDA,
        DispatchKey.CompositeImplicitAutograd,
        DispatchKey.CompositeExplicitAutograd,
        # Meta is a magic key: it is automatically generated for structured
        # kernels
        DispatchKey.Meta,
    ]
    # Only a limited set of dispatch keys get CPUFunctions.h headers generated
    # for them; this is the set
    functions_keys = {
        DispatchKey.CPU,
        DispatchKey.CUDA,
        DispatchKey.CompositeImplicitAutograd,
        DispatchKey.CompositeExplicitAutograd,
    }
    if options.backend_whitelist:
        dispatch_keys = [
            k for k in dispatch_keys if is_generic_dispatch_key(k)
            or str(k) in options.backend_whitelist
        ]

    static_dispatch_backend: Optional[DispatchKey] = None
    if options.static_dispatch_backend:
        static_dispatch_backend = DispatchKey.parse(
            options.static_dispatch_backend)

    for dispatch_key in dispatch_keys:
        fm = cuda_fm if is_cuda_dispatch_key(dispatch_key) else cpu_fm

        fm.write_with_template(
            f'Register{dispatch_key}.cpp', 'RegisterDispatchKey.cpp', lambda: {
                'extra_cuda_headers':
                extra_cuda_headers
                if is_cuda_dispatch_key(dispatch_key) else '',
                'legacy_th_headers':
                '#include <ATen/LegacyTHFunctionsCPU.h>' if dispatch_key ==
                DispatchKey.CPU else '#include <ATen/LegacyTHFunctionsCUDA.h>'
                if dispatch_key == DispatchKey.CUDA else '',
                'DispatchKey':
                dispatch_key,
                'dispatch_namespace':
                dispatch_key.lower(),
                'dispatch_namespaced_definitions':
                list(
                    concatMap(
                        dest.RegisterDispatchKey(dispatch_key,
                                                 Target.NAMESPACED_DEFINITION,
                                                 selector,
                                                 rocm=options.rocm),
                        grouped_native_functions)),
                'dispatch_anonymous_definitions':
                list(
                    concatMap(
                        dest.RegisterDispatchKey(dispatch_key,
                                                 Target.ANONYMOUS_DEFINITION,
                                                 selector,
                                                 rocm=options.rocm),
                        grouped_native_functions)),
                'dispatch_registrations':
                list(
                    concatMap(
                        dest.RegisterDispatchKey(dispatch_key,
                                                 Target.REGISTRATION,
                                                 selector,
                                                 rocm=options.rocm),
                        grouped_native_functions)),
            })

        if dispatch_key in functions_keys:
            fm.write_with_template(
                f'{dispatch_key}Functions.h', 'DispatchKeyFunctions.h',
                lambda: {
                    'dispatch_namespace':
                    dispatch_key.lower(),
                    'dispatch_namespaced_declarations':
                    list(
                        concatMap(
                            dest.RegisterDispatchKey(
                                dispatch_key,
                                Target.NAMESPACED_DECLARATION,
                                selector,
                                rocm=options.rocm), grouped_native_functions)),
                })

        del fm

    # BackendSelect is generated specially
    cpu_fm.write(
        'RegisterBackendSelect.cpp', lambda: {
            'backend_select_method_definitions':
            list(
                mapMaybe(ComputeBackendSelect(Target.DEFINITION),
                         native_functions)),
            'backend_select_function_registrations':
            list(
                mapMaybe(ComputeBackendSelect(Target.REGISTRATION),
                         native_functions)),
        })

    cpu_fm.write(
        'MetaFunctions.h', lambda: {
            'declarations':
            list(
                mapMaybe(compute_meta_function_declaration,
                         structured_native_functions)),
        })

    schema_selector = selector
    if options.force_schema_registration:
        schema_selector = SelectiveBuilder.get_nop_selector()
    cpu_fm.write(
        'RegisterSchema.cpp', lambda: {
            'schema_registrations':
            list(mapMaybe(RegisterSchema(schema_selector), native_functions)),
        })

    cpu_fm.write(
        'Functions.h', lambda: {
            'function_declarations':
            list(
                mapMaybe(
                    ComputeFunction(
                        Target.DECLARATION,
                        static_dispatch_backend=static_dispatch_backend,
                        is_redispatching_fn=False), native_functions)),
        })
    cpu_fm.write(
        'Functions.cpp', lambda: {
            'static_dispatch_extra_headers':
            static_dispatch_extra_headers(static_dispatch_backend),
            'function_definitions':
            list(
                mapMaybe(
                    ComputeFunction(
                        Target.DEFINITION,
                        static_dispatch_backend=static_dispatch_backend,
                        is_redispatching_fn=False), native_functions)),
        })
    cpu_fm.write(
        'RedispatchFunctions.h', lambda: {
            'function_redispatch_declarations':
            list(
                mapMaybe(
                    ComputeFunction(
                        Target.DECLARATION,
                        static_dispatch_backend=static_dispatch_backend,
                        is_redispatching_fn=True), native_functions)),
        })
    cpu_fm.write(
        'RedispatchFunctions.cpp', lambda: {
            'static_dispatch_extra_headers':
            static_dispatch_extra_headers(static_dispatch_backend),
            'function_redispatch_definitions':
            list(
                mapMaybe(
                    ComputeFunction(
                        Target.DEFINITION,
                        static_dispatch_backend=static_dispatch_backend,
                        is_redispatching_fn=True), native_functions)),
        })
    core_fm.write(
        'TensorBody.h', lambda: {
            'tensor_method_declarations':
            list(
                mapMaybe(
                    ComputeTensorMethod(Target.DECLARATION,
                                        static_dispatch_backend=
                                        static_dispatch_backend),
                    native_functions)),
        })
    core_fm.write(
        'TensorMethods.cpp', lambda: {
            'static_dispatch_extra_headers':
            static_dispatch_extra_headers(static_dispatch_backend),
            'tensor_method_definitions':
            list(
                mapMaybe(
                    ComputeTensorMethod(Target.DEFINITION,
                                        static_dispatch_backend=
                                        static_dispatch_backend),
                    native_functions)),
        })
    core_fm.write(
        'ATenOpList.cpp', lambda: {
            'aten_ops': list(mapMaybe(compute_aten_op, native_functions)),
        })
    cpu_fm.write(
        'NativeFunctions.h', lambda: {
            'native_function_declarations':
            list(
                concatMap(dest.compute_native_function_declaration,
                          grouped_native_functions)),
        })

    cpu_fm.write(
        'Declarations.yaml', lambda: format_yaml(
            [compute_declaration_yaml(f) for f in native_functions]))
    cpu_fm.write(
        'RegistrationDeclarations.h', lambda: {
            'registration_declarations':
            [compute_registration_declarations(f) for f in native_functions],
        })

    if options.output_dependencies:
        cpu_fm.write_outputs(options.output_dependencies)
        core_fm.write_outputs(f"{options.output_dependencies}-core")
        cuda_fm.write_outputs(f"{options.output_dependencies}-cuda")
示例#2
0
 def gen_differentiable_inputs(
         f: NativeFunction) -> List[DifferentiableInput]:
     return list(
         mapMaybe(gen_differentiable_input, f.func.arguments.non_out))
def emit_inplace_functionalization_body(
        f: NativeFunction, functional_op: Optional[NativeFunction]) -> str:
    # mutation case
    assert (modifies_arguments(f))

    dispatcher_sig = DispatcherSignature.from_schema(f.func)

    keyset = 'dispatchKeySet & c10::after_func_keyset'
    return_type = dispatcher_sig.returns_type().remove_const_ref().cpp_type()

    unwrap_tensor_args_str, unwrapped_args_ctx = unwrap_tensor_args(
        dispatcher_sig)

    maybe_return = '' if len(f.func.returns) == 0 else 'return '
    sync_tensor_args = '\n      '.join(
        mapMaybe(
            lambda arg: f'at::functionalization::impl::sync({arg.name});'
            if arg.type.is_tensor_like() else None, f.func.arguments.flat_all))

    # Note [functionalizating copy_() and not preserving strides]
    # copy_() can't be functionalized, since there doesn't exist an out-of-place variant.
    # We could add one, but that would be sub-optimal for functorch: copy() would need to allocate a fresh tensor.
    # This may seem like a large hack for one optimization, but copy_() is one of the most common inplace operators.
    # Instead, we can replace `self.copy_(src)` with `src.to(self).expand_as(self)`.
    # This maintains the exact same semantics, EXCEPT that we don't preserve the strides from `self`.
    # This seems like a reasonable tradeoff, for a few reasons:
    # - mutation removal is only used by functorch, and not by Vulkan or XLA. Functorch already doesn't preserve strides.
    # - There are actually a few other places where the functionalization pass currently doesn't support strides:
    #   calls to slice/diagonal_scatter don't currently preserve the strides of their inputs (but maybe we should fix this).
    if str(f.func.name) == 'copy_':
        exprs = [keyset] + [a.name for a in unwrapped_args_ctx]
        functional_call_str = f"""\
            auto tmp_intermediate = at::_ops::to_other::redispatch({keyset}, src_, self_, non_blocking, false, c10::nullopt);
            tmp_output = at::_ops::expand_as::redispatch({keyset}, tmp_intermediate, self_);"""
    elif functional_op is None:
        # We can't functionalize this inplace op, since we don't know what the corresponding functional op is.
        inplace_exprs = [keyset] + [
            e.expr for e in translate(
                unwrapped_args_ctx, dispatcher_sig.arguments(), method=False)
        ]
        warn_str = "Note: the functionalization pass encountered an operator ({}) that it could not functionalize, \
because it couldn't find an out-of-place equivalent of the operator to call. \
Instead, it's calling the inplace/view operator directly. \
If this causes problems in your program, consider upstreaming the out-of-place op to PyTorch.".format(
            str(f.func.name))

        return f"""
      if (c10::impl::tls_local_dispatch_key_set().included_.has(c10::DispatchKey::Functionalize)) {{
          TORCH_WARN("{warn_str}");
      }}
      {sync_tensor_args}
      {unwrap_tensor_args_str}
      at::AutoDispatchSkipFunctionalize guard;
      // Redispatch as normally otherwise, since XLA has its own lowerings for special inplace ops.
      {maybe_return}at::_ops::{f.func.name.unambiguous_name()}::redispatch({', '.join(inplace_exprs)});
"""
    else:
        # call the out-of-place variant of the op
        functional_sig = DispatcherSignature.from_schema(functional_op.func)
        functional_exprs = [keyset] + [
            e.expr for e in translate(
                unwrapped_args_ctx, functional_sig.arguments(), method=False)
        ]
        functional_call_str = \
            f"tmp_output = at::_ops::{functional_op.func.name.unambiguous_name()}::redispatch({', '.join(functional_exprs)});"

    mutable_input_post_processing = '\n'.join([
        f"""
      auto {a.name}_functional = at::functionalization::impl::unsafeGetFunctionalWrapper({a.name});
      {a.name}_functional->replace_(tmp_output);
      {a.name}_functional->commit_update();"""
        for a in f.func.arguments.flat_non_out
        if a.annotation and a.annotation.is_write and a.type.is_tensor_like()
    ])

    return f"""