Exemplo n.º 1
0
 def __call__(self, definition_ir) -> Dict[str, Dict[str, str]]:
     gtir = GtirPipeline(DefIRToGTIR.apply(definition_ir)).full()
     base_oir = gtir_to_oir.GTIRToOIR().visit(gtir)
     oir_pipeline = self.backend.builder.options.backend_opts.get(
         "oir_pipeline", DefaultPipeline(skip=[NoFieldAccessPruning]))
     oir = oir_pipeline.run(base_oir)
     oir = FillFlushToLocalKCaches().visit(oir)
     cuir = oir_to_cuir.OIRToCUIR().visit(oir)
     cuir = kernel_fusion.FuseKernels().visit(cuir)
     cuir = extent_analysis.CacheExtents().visit(cuir)
     format_source = self.backend.builder.options.format_source
     implementation = cuir_codegen.CUIRCodegen.apply(
         cuir, format_source=format_source)
     bindings = GTCCudaBindingsCodegen.apply(cuir,
                                             module_name=self.module_name,
                                             backend=self.backend,
                                             format_source=format_source)
     return {
         "computation": {
             "computation.hpp": implementation
         },
         "bindings": {
             "bindings.cu": bindings
         },
     }
Exemplo n.º 2
0
def test_forward_backward_fusion():
    testee = ProgramFactory(kernels=[
        KernelFactory(vertical_loops__0=VerticalLoopFactory(
            loop_order=LoopOrder.FORWARD,
            sections=[
                VerticalLoopSectionFactory(
                    end__level=0,
                    end__offset=1,
                    horizontal_executions__0__body__0=AssignStmtFactory(
                        left__name="tmp", right__name="inp"),
                ),
                VerticalLoopSectionFactory(
                    start__offset=1,
                    horizontal_executions__0__body__0=AssignStmtFactory(
                        left__name="tmp",
                        right__name="tmp",
                        right__offset__k=-1),
                ),
            ],
        )),
        KernelFactory(vertical_loops__0=VerticalLoopFactory(
            loop_order=LoopOrder.BACKWARD,
            sections__0__horizontal_executions__0__body__0=AssignStmtFactory(
                left__name="out", right__name="tmp"),
        )),
    ], )
    transformed = kernel_fusion.FuseKernels().visit(testee)
    assert len(transformed.kernels) == 1
    assert transformed.kernels[0].vertical_loops == (
        testee.kernels[0].vertical_loops + testee.kernels[1].vertical_loops)
Exemplo n.º 3
0
def test_no_fusion_with_parallel_offsets():
    testee = ProgramFactory(kernels=[
        KernelFactory(vertical_loops__0=VerticalLoopFactory(
            loop_order=LoopOrder.FORWARD,
            sections=[
                VerticalLoopSectionFactory(
                    end__level=0,
                    end__offset=1,
                    horizontal_executions__0__body__0=AssignStmtFactory(
                        left__name="tmp", right__name="inp"),
                ),
                VerticalLoopSectionFactory(
                    start__offset=1,
                    horizontal_executions__0__body__0=AssignStmtFactory(
                        left__name="tmp",
                        right__name="tmp",
                        right__offset__k=-1),
                ),
            ],
        )),
        KernelFactory(vertical_loops__0=VerticalLoopFactory(
            loop_order=LoopOrder.BACKWARD,
            sections__0__horizontal_executions__0__body__0=AssignStmtFactory(
                left__name="out", right__name="tmp", right__offset__i=1),
        )),
    ], )
    transformed = kernel_fusion.FuseKernels().visit(testee)
    assert len(transformed.kernels) == 2

    testee = ProgramFactory(kernels=[
        KernelFactory(
            vertical_loops__0__sections__0__horizontal_executions__0__body__0=
            AssignStmtFactory(left__name="tmp", right__name="inp")),
        KernelFactory(
            vertical_loops__0__sections__0__horizontal_executions__0__body__0=
            AssignStmtFactory(
                left__name="out", right__name="tmp", right__offset__k=1)),
    ], )
    transformed = kernel_fusion.FuseKernels().visit(testee)
    assert len(transformed.kernels) == 2
Exemplo n.º 4
0
 def __call__(self, definition_ir) -> Dict[str, Dict[str, str]]:
     gtir = DefIRToGTIR.apply(definition_ir)
     gtir_without_unused_params = prune_unused_parameters(gtir)
     dtype_deduced = resolve_dtype(gtir_without_unused_params)
     upcasted = upcast(dtype_deduced)
     oir = gtir_to_oir.GTIRToOIR().visit(upcasted)
     oir = self._optimize_oir(oir)
     cuir = oir_to_cuir.OIRToCUIR().visit(oir)
     cuir = kernel_fusion.FuseKernels().visit(cuir)
     cuir = extent_analysis.ComputeExtents().visit(cuir)
     cuir = extent_analysis.CacheExtents().visit(cuir)
     implementation = cuir_codegen.CUIRCodegen.apply(cuir)
     bindings = GTCCudaBindingsCodegen.apply(cuir, module_name=self.module_name)
     return {
         "computation": {"computation.hpp": implementation},
         "bindings": {"bindings.cu": bindings},
     }
Exemplo n.º 5
0
 def __call__(self, definition_ir) -> Dict[str, Dict[str, str]]:
     gtir = GtirPipeline(DefIRToGTIR.apply(definition_ir)).full()
     oir = OirPipeline(gtir_to_oir.GTIRToOIR().visit(gtir)).full(
         skip=[NoFieldAccessPruning])
     cuir = oir_to_cuir.OIRToCUIR().visit(oir)
     cuir = kernel_fusion.FuseKernels().visit(cuir)
     cuir = extent_analysis.ComputeExtents().visit(cuir)
     cuir = extent_analysis.CacheExtents().visit(cuir)
     implementation = cuir_codegen.CUIRCodegen.apply(cuir)
     bindings = GTCCudaBindingsCodegen.apply(cuir,
                                             module_name=self.module_name,
                                             backend=self.backend)
     return {
         "computation": {
             "computation.hpp": implementation
         },
         "bindings": {
             "bindings.cu": bindings
         },
     }