def _specialize_iet(cls, graph, **kwargs): options = kwargs['options'] platform = kwargs['platform'] compiler = kwargs['compiler'] sregistry = kwargs['sregistry'] # Flush denormal numbers avoid_denormals(graph, platform=platform) # Distributed-memory parallelism mpiize(graph, sregistry=sregistry, options=options) # Lower BlockDimensions so that blocks of arbitrary shape may be used relax_incr_dimensions(graph) # Parallelism parizer = cls._Target.Parizer(sregistry, options, platform, compiler) parizer.make_simd(graph) parizer.make_parallel(graph) parizer.initialize(graph) # Misc optimizations hoist_prodders(graph) # Symbol definitions cls._Target.DataManager(sregistry).process(graph) # Linearize n-dimensional Indexeds linearize(graph, mode=options['linearize'], sregistry=sregistry) return graph
def _specialize_iet(cls, graph, **kwargs): options = kwargs['options'] platform = kwargs['platform'] sregistry = kwargs['sregistry'] # Flush denormal numbers avoid_denormals(graph) # Distributed-memory parallelism optimize_halospots(graph) if options['mpi']: mpiize(graph, mode=options['mpi']) # Lower IncrDimensions so that blocks of arbitrary shape may be used relax_incr_dimensions(graph, sregistry=sregistry) # Parallelism parizer = cls._Target.Parizer(sregistry, options, platform) parizer.make_simd(graph) parizer.make_parallel(graph) # Misc optimizations hoist_prodders(graph) # Symbol definitions cls._Target.DataManager(sregistry).process(graph) # Initialize the target-language runtime parizer.initialize(graph) return graph
def _specialize_iet(cls, graph, **kwargs): options = kwargs['options'] platform = kwargs['platform'] sregistry = kwargs['sregistry'] # Distributed-memory parallelism optimize_halospots(graph) if options['mpi']: mpiize(graph, mode=options['mpi']) # GPU parallelism parizer = cls._Target.Parizer(sregistry, options, platform) parizer.make_parallel(graph) # Misc optimizations hoist_prodders(graph) # Symbol definitions cls._Target.DataManager(sregistry, options).process(graph) # Initialize the target-language runtime parizer.initialize(graph) # TODO: This should be moved right below the `mpiize` pass, but currently calling # `make_gpudirect` before Symbol definitions` block would create Blocks before # creating C variables. That would lead to MPI_Request variables being local to # their blocks. This way, it would generate incorrect C code. if options['gpu-direct']: parizer.make_gpudirect(graph) return graph
def _specialize_iet(cls, graph, **kwargs): options = kwargs['options'] platform = kwargs['platform'] # Flush denormal numbers avoid_denormals(graph) # Distributed-memory parallelism optimize_halospots(graph) if options['mpi']: mpiize(graph, mode=options['mpi']) # Lower IncrDimensions so that blocks of arbitrary shape may be used relax_incr_dimensions(graph, counter=generator()) # SIMD-level parallelism ompizer = Ompizer() ompizer.make_simd(graph, simd_reg_size=platform.simd_reg_size) # Shared-memory parallelism ompizer.make_parallel(graph) # Misc optimizations hoist_prodders(graph) # Symbol definitions data_manager = DataManager() data_manager.place_definitions(graph) data_manager.place_casts(graph) return graph
def _specialize_iet(cls, graph, **kwargs): options = kwargs['options'] platform = kwargs['platform'] sregistry = kwargs['sregistry'] # Distributed-memory parallelism optimize_halospots(graph) if options['mpi']: mpiize(graph, mode=options['mpi']) # Lower IncrDimensions so that blocks of arbitrary shape may be used relax_incr_dimensions(graph, sregistry=sregistry) # SIMD-level parallelism ompizer = Ompizer(sregistry, options) ompizer.make_simd(graph, simd_reg_size=platform.simd_reg_size) # Shared-memory parallelism ompizer.make_parallel(graph) # Misc optimizations hoist_prodders(graph) # Symbol definitions DataManager(sregistry).process(graph) return graph
def _specialize_iet(cls, graph, **kwargs): options = kwargs['options'] platform = kwargs['platform'] compiler = kwargs['compiler'] sregistry = kwargs['sregistry'] # Distributed-memory parallelism mpiize(graph, sregistry=sregistry, options=options) # Loop tiling relax_incr_dimensions(graph) # GPU parallelism parizer = cls._Target.Parizer(sregistry, options, platform, compiler) parizer.make_parallel(graph) parizer.initialize(graph) # Misc optimizations hoist_prodders(graph) # Symbol definitions cls._Target.DataManager(sregistry, options).process(graph) # Linearize n-dimensional Indexeds linearize(graph, mode=options['linearize'], sregistry=sregistry) return graph
def _specialize_iet(cls, graph, **kwargs): options = kwargs['options'] sregistry = kwargs['sregistry'] # Distributed-memory parallelism optimize_halospots(graph) if options['mpi']: mpiize(graph, mode=options['mpi']) # GPU parallelism via OpenMP offloading DeviceOmpizer(sregistry, options).make_parallel(graph) # Misc optimizations hoist_prodders(graph) # Symbol definitions data_manager = DeviceOpenMPDataManager(sregistry) data_manager.place_ondevice(graph) data_manager.place_definitions(graph) data_manager.place_casts(graph) # Initialize OpenMP environment initialize(graph) # TODO: This should be moved right below the `mpiize` pass, but currently calling # `mpi_gpu_direct` before Symbol definitions` block would create Blocks before # creating C variables. That would lead to MPI_Request variables being local to # their blocks. This way, it would generate incorrect C code. if options['gpu-direct']: mpi_gpu_direct(graph) return graph
def _specialize_iet(cls, graph, **kwargs): options = kwargs['options'] language = kwargs['language'] platform = kwargs['platform'] sregistry = kwargs['sregistry'] # Distributed-memory parallelism optimize_halospots(graph) if options['mpi']: mpiize(graph, mode=options['mpi'], language=language, sregistry=sregistry) # GPU parallelism parizer = cls._Target.Parizer(sregistry, options, platform) parizer.make_parallel(graph) parizer.initialize(graph) # Misc optimizations hoist_prodders(graph) # Symbol definitions cls._Target.DataManager(sregistry, options).process(graph) # Linearize n-dimensional Indexeds if options['linearize']: linearize(graph, sregistry=sregistry) return graph
def _specialize_iet(cls, graph, **kwargs): options = kwargs['options'] sregistry = kwargs['sregistry'] # Distributed-memory parallelism optimize_halospots(graph) if options['mpi']: mpiize(graph, mode=options['mpi']) # GPU parallelism via OpenACC offloading DeviceAccizer(sregistry).make_parallel(graph) # Misc optimizations hoist_prodders(graph) # Symbol definitions data_manager = DeviceOpenACCDataManager(sregistry) data_manager.place_ondevice(graph) data_manager.place_definitions(graph) data_manager.place_casts(graph) # Initialize OpenACC environment if options['mpi']: initialize(graph) return graph
def _specialize_iet(cls, graph, **kwargs): options = kwargs['options'] platform = kwargs['platform'] # Flush denormal numbers avoid_denormals(graph) # Distributed-memory parallelism optimize_halospots(graph) if options['mpi']: mpiize(graph, mode=options['mpi']) # Tiling blocker = Blocker(options['blockinner'], options['blocklevels'] or cls.BLOCK_LEVELS) blocker.make_blocking(graph) # Shared-memory and SIMD-level parallelism ompizer = Ompizer() ompizer.make_simd(graph, simd_reg_size=platform.simd_reg_size) if options['openmp']: ompizer.make_parallel(graph) # Misc optimizations hoist_prodders(graph) # Symbol definitions data_manager = DataManager() data_manager.place_definitions(graph) data_manager.place_casts(graph) return graph
def _specialize_iet(cls, graph, **kwargs): options = kwargs['options'] # Distributed-memory parallelism if options['mpi']: mpiize(graph, mode=options['mpi']) # GPU parallelism via OpenMP offloading DeviceOmpizer().make_parallel(graph) # Symbol definitions data_manager = DeviceOpenMPDataManager() data_manager.place_ondevice(graph) data_manager.place_definitions(graph) data_manager.place_casts(graph) return graph
def _specialize_iet(cls, graph, **kwargs): options = kwargs['options'] sregistry = kwargs['sregistry'] # Distributed-memory parallelism if options['mpi']: mpiize(graph, mode=options['mpi']) # Shared-memory parallelism if options['openmp']: ompizer = Ompizer(sregistry, options) ompizer.make_parallel(graph) # Symbol definitions DataManager(sregistry).process(graph) return graph
def _specialize_iet(cls, graph, **kwargs): options = kwargs['options'] platform = kwargs['platform'] compiler = kwargs['compiler'] sregistry = kwargs['sregistry'] # Distributed-memory parallelism mpiize(graph, sregistry=sregistry, options=options) # GPU parallelism parizer = cls._Target.Parizer(sregistry, options, platform, compiler) parizer.make_parallel(graph) parizer.initialize(graph) # Symbol definitions cls._Target.DataManager(sregistry, options).process(graph) return graph
def _specialize_iet(cls, graph, **kwargs): options = kwargs['options'] # Distributed-memory parallelism if options['mpi']: mpiize(graph, mode=options['mpi']) # Shared-memory parallelism if options['openmp']: ompizer = Ompizer() ompizer.make_parallel(graph) # Symbol definitions data_manager = DataManager() data_manager.place_definitions(graph) data_manager.place_casts(graph) return graph
def _specialize_iet(cls, graph, **kwargs): options = kwargs['options'] sregistry = kwargs['sregistry'] # Distributed-memory parallelism if options['mpi']: mpiize(graph, mode=options['mpi']) # GPU parallelism via OpenMP offloading DeviceOmpizer(sregistry, options).make_parallel(graph) # Symbol definitions DeviceOpenMPDataManager(sregistry, options).process(graph) # Initialize OpenMP environment initialize(graph) return graph
def _specialize_iet(cls, graph, **kwargs): options = kwargs['options'] platform = kwargs['platform'] sregistry = kwargs['sregistry'] # Distributed-memory parallelism if options['mpi']: mpiize(graph, mode=options['mpi']) # Shared-memory parallelism if options['openmp']: parizer = cls._Target.Parizer(sregistry, options, platform) parizer.make_parallel(graph) parizer.initialize(graph) # Symbol definitions cls._Target.DataManager(sregistry).process(graph) return graph
def _specialize_iet(cls, graph, **kwargs): options = kwargs['options'] sregistry = kwargs['sregistry'] # Distributed-memory parallelism if options['mpi']: mpiize(graph, mode=options['mpi']) # Device and host parallelism via OpenACC offloading accizer = DeviceAccizer(sregistry, options) accizer.make_parallel(graph) # Symbol definitions DeviceOpenACCDataManager(sregistry, options).process(graph) # Initialize OpenACC environment if options['mpi']: initialize(graph) return graph
def _specialize_iet(cls, graph, **kwargs): options = kwargs['options'] # Distributed-memory parallelism optimize_halospots(graph) if options['mpi']: mpiize(graph, mode=options['mpi']) # GPU parallelism via OpenMP offloading DeviceOmpizer().make_parallel(graph) # Misc optimizations hoist_prodders(graph) # Symbol definitions data_manager = DeviceDataManager() data_manager.place_ondevice(graph, efuncs=list(graph.efuncs.values())) data_manager.place_definitions(graph) data_manager.place_casts(graph) return graph