from .elemwise import (GpuElemwise, GpuDimShuffle, GpuCAReduceCuda, GpuCAReduceCPY) from .subtensor import (GpuIncSubtensor, GpuSubtensor, GpuAdvancedSubtensor1, GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20) from .opt_util import alpha_merge, output_merge _logger = logging.getLogger("theano.gpuarray.opt") gpu_optimizer = EquilibriumDB() gpu_cut_copies = EquilibriumDB() gpu_seqopt = SequenceDB() # Don't register this right now conv_groupopt = LocalGroupDB() conv_groupopt.__name__ = "gpua_conv_opts" gpu_seqopt.register('gpuarray_local_optimiziations', gpu_optimizer, 1, 'fast_compile', 'fast_run', 'gpuarray') gpu_seqopt.register('gpuarray_cut_transfers', gpu_cut_copies, 2, 'fast_compile', 'fast_run', 'gpuarray') # do not add 'fast_run' to these two as this would always enable gpuarray mode optdb.register('gpuarray_opt', gpu_seqopt, optdb.__position__.get('add_destroy_handler', 49.5) - 1, 'gpuarray') def register_opt(*tags, **kwargs): def f(local_opt):
GpuSubtensor, GpuAdvancedSubtensor1, GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20, ) from .opt_util import alpha_merge, output_merge _logger = logging.getLogger("theano.sandbox.gpuarray.opt") gpu_optimizer = EquilibriumDB() gpu_cut_copies = EquilibriumDB() gpu_seqopt = SequenceDB() # Don't register this right now conv_groupopt = LocalGroupDB() conv_groupopt.__name__ = "gpua_conv_opts" gpu_seqopt.register("gpuarray_local_optimiziations", gpu_optimizer, 1, "fast_compile", "fast_run", "gpuarray") gpu_seqopt.register("gpuarray_cut_transfers", gpu_cut_copies, 2, "fast_compile", "fast_run", "gpuarray") # do not add 'fast_run' to these two as this would always enable gpuarray mode optdb.register("gpuarray_opt", gpu_seqopt, optdb.__position__.get("add_destroy_handler", 49.5) - 1, "gpuarray") def register_opt(*tags, **kwargs): def f(local_opt): name = (kwargs and kwargs.pop("name")) or local_opt.__name__ gpu_optimizer.register(name, local_opt, "fast_run", "gpuarray", *tags) return local_opt
gpu_cut_copies = EquilibriumDB() # Not used for an EquilibriumOptimizer. It has the "tracks" that we need for GraphToGPUDB. gpu_optimizer2 = EquilibriumDB() gpu_seqopt = SequenceDB() # do not add 'fast_run' to these two as this would always enable gpuarray mode optdb.register( "gpuarray_opt", gpu_seqopt, optdb.__position__.get("add_destroy_handler", 49.5) - 1, "gpuarray", ) pool_db = LocalGroupDB() pool_db2 = LocalGroupDB(local_opt=GraphToGPULocalOptGroup) pool_db2.__name__ = "pool_db2" matrix_ops_db = LocalGroupDB() matrix_ops_db2 = LocalGroupDB(local_opt=GraphToGPULocalOptGroup) matrix_ops_db2.__name__ = "matrix_ops_db2" abstract_batch_norm_db = LocalGroupDB() abstract_batch_norm_db2 = LocalGroupDB(local_opt=GraphToGPULocalOptGroup) abstract_batch_norm_db2.__name__ = "abstract_batch_norm_db2" abstract_batch_norm_groupopt = LocalGroupDB() abstract_batch_norm_groupopt.__name__ = "gpuarray_batchnorm_opts"