def __init__(self, pool_shape, inplace, BCHW_grad_output): pool_shape = tuple(pool_shape) super(PoolHWBCOpGrad, self).__init__() assert len(pool_shape) == 2, len(pool_shape) assert pool_shape[0] > 0, pool_shape[0] assert pool_shape[1] > 0, pool_shape[1] if BCHW_grad_output: assert inplace self.pool_shape = pool_shape self.inplace = inplace self.BCHW_grad_output = BCHW_grad_output if inplace: self.destroy_map = {0: [0]} #register optimization for this pool_shape else: if not hasattr(optdb, 'PoolHWBCOpGradInplaceOpt_registered'): optdb.PoolHWBCOpGradInplaceOpt_registered = [] if pool_shape not in optdb.PoolHWBCOpGradInplaceOpt_registered: PoolHWBCOpGradInplaceOpt = OpSub( self, PoolHWBCOpGrad(self.pool_shape, inplace=True, BCHW_grad_output=False)) optdb.PoolHWBCOpGradInplaceOpt_registered.append(pool_shape) optdb.register( 'PoolHWBCOpGradInplaceOpt' + str(pool_shape), theano.gof.TopoOptimizer( PoolHWBCOpGradInplaceOpt, failure_callback=gof.TopoOptimizer.warn_inplace), 50.0, 'fast_run', 'inplace', 'gpuarray')
def register_func(recurrent_transform): """ :type recurrent_transform: RecurrentTransform.RecurrentTransformBase """ fn = recurrent_transform.name key = (fn, id(recurrent_transform)) if key in function_ops: return function_ops[key] # register op no_inpl = LSTMCustomOp(fun_name=fn, inplace=False, recurrent_transform=recurrent_transform) inpl = LSTMCustomOp(fun_name=fn, inplace=True, recurrent_transform=recurrent_transform) function_ops[key] = no_inpl # hack to avoid being called twice attr = 'LSTMCustomMOpInplaceOpt_%s_%i' % (fn, id(recurrent_transform)) if not hasattr(optdb, attr): opt = OpSub(no_inpl, inpl) optdb.register(attr, theano.gof.TopoOptimizer(opt), 50.0, 'fast_run', 'inplace', 'gpuarray') setattr(optdb, attr, True) # the same for grad no_inpl = LSTMCustomOpGrad(fun_name=fn, inplace=False, recurrent_transform=recurrent_transform) inpl = LSTMCustomOpGrad(fun_name=fn, inplace=True, recurrent_transform=recurrent_transform) grad_ops[key] = no_inpl # hack to avoid being called twice attr = 'LSTMCustomMOpGradInplaceOpt_%s_%i' % (fn, id(recurrent_transform)) if not hasattr(optdb, attr): opt = OpSub(no_inpl, inpl) optdb.register(attr, theano.gof.TopoOptimizer(opt), 50.0, 'fast_run', 'inplace', 'gpuarray') setattr(optdb, attr, True) return function_ops[key]
def OpSubOptimizer(op1, op2, fail=NavigatorOptimizer.warn_ignore, ign=True): return TopoOptimizer(OpSub(op1, op2), ignore_newtrees=ign, failure_callback=fail)
Py_XDECREF(epsilon2); """ % locals() #!!! change this when changing the code! #def c_code_cache_version(self): # return 3, 1 BidirectionalTwoDLSTMOpGradNoInplaceInstance = BidirectionalTwoDLSTMOpGrad( inplace=False) BidirectionalTwoDLSTMOpGradInplaceInstance = BidirectionalTwoDLSTMOpGrad( inplace=True) BidirectionalTwoDLSTMOpInplaceOpt = OpSub( BidirectionalTwoDLSTMOpGradNoInplaceInstance, BidirectionalTwoDLSTMOpGradInplaceInstance) #hack to avoid being called twice if not hasattr(optdb, 'BidirectionalTwoDLSTMOpInplaceOpt_registered'): optdb.register( 'BidirectionalTwoDLSTMOpInplaceOpt', theano.gof.TopoOptimizer( BidirectionalTwoDLSTMOpInplaceOpt, failure_callback=gof.TopoOptimizer.warn_inplace), 50.0, 'fast_run', 'inplace', 'gpuarray') optdb.BidirectionalTwoDLSTMOpInplaceOpt_registered = True class BidirectionalTwoDLSTMOp(theano.sandbox.cuda.GpuOp): __props__ = ()
def c_code_cache_version(self): return 3, 3 CuDNNConvHWBCOpGradValidNoInplaceInstance = CuDNNConvHWBCOpGrad("valid", inplace=False) CuDNNConvHWBCOpGradValidInplaceInstance = CuDNNConvHWBCOpGrad("valid", inplace=True) CuDNNConvHWBCOpGradFullNoInplaceInstance = CuDNNConvHWBCOpGrad("full", inplace=False) CuDNNConvHWBCOpGradFullInplaceInstance = CuDNNConvHWBCOpGrad("full", inplace=True) CuDNNConvHWBCOpGradValidInplaceOpt = OpSub( CuDNNConvHWBCOpGradValidNoInplaceInstance, CuDNNConvHWBCOpGradValidInplaceInstance) #hack to avoid being called twice if not hasattr(optdb, 'CuDNNConvHWBCOpGradValidInplaceOpt_registered'): optdb.register( 'CuDNNConvHWBCOpGradValidInplaceOpt', theano.gof.TopoOptimizer( CuDNNConvHWBCOpGradValidInplaceOpt, failure_callback=gof.TopoOptimizer.warn_inplace), 50.0, 'fast_run', 'inplace', 'gpuarray') optdb.CuDNNConvHWBCOpGradValidInplaceOpt_registered = True #TODO: maybe this optimization causes problems #CuDNNConvHWBCOpGradFullInplaceOpt = OpSub(CuDNNConvHWBCOpGradFullNoInplaceInstance, CuDNNConvHWBCOpGradFullInplaceInstance) ##hack to avoid being called twice #if not hasattr(optdb, 'CuDNNConvHWBCOpGradFullInplaceOpt_registered'):
{ Py_XDECREF(epsilon_f); Py_XDECREF(epsilon_b); } """ % locals() #!!! change this when changing the code! def c_code_cache_version(self): return 1, 7 BLSTMOpGradNoInplaceInstance = BLSTMOpGrad(inplace=False) BLSTMOpGradInplaceInstance = BLSTMOpGrad(inplace=True) BLSTMOpGradInplaceOpt = OpSub(BLSTMOpGradNoInplaceInstance, BLSTMOpGradInplaceInstance) #hack to avoid being called twice if not hasattr(optdb, 'BLSTMOpGradInplaceOpt_registered'): optdb.register('BLSTMOpGradInplaceOpt', theano.gof.TopoOptimizer(BLSTMOpGradInplaceOpt), 50.0, 'fast_run', 'inplace', 'gpuarray') optdb.BLSTMOpGradInplaceOpt_registered = True #------------------------ class BLSTMOp(theano.sandbox.cuda.GpuOp): def __init__(self, inplace): self.inplace = inplace if inplace:
def OpSubOptimizer(op1, op2): return OpKeyOptimizer(OpSub(op1, op2))
from scalmulop import ScalMulV1 from doubleop import DoubleOp from theano.gof import local_optimizer from theano.tensor.opt import register_specialize @register_specialize @local_optimizer([ScalMulV1]) def local_scalmul_double_v1(node): if not (isinstance(node.op, ScalMulV1) and node.op.scal == 2): return False return [DoubleOp()(node.inputs[0])] from theano.gof.opt import OpSub local_scalmul_double_v2 = OpSub(ScalMulV1(2), DoubleOp()) register_specialize(local_scalmul_double_v2, name='local_scalmul_double_v2')
if(!%(inplace)s) { Py_XDECREF(epsilon); } """ % locals() #!!! change this when changing the code! def c_code_cache_version(self): return 1, 5 LSTMOpGradNoInplaceInstance = LSTMOpGrad(inplace=False) LSTMOpGradInplaceInstance = LSTMOpGrad(inplace=True) LSTMOpGradInplaceOpt = OpSub(LSTMOpGradNoInplaceInstance, LSTMOpGradInplaceInstance) #hack to avoid being called twice if not hasattr(optdb, 'LSTMOpGradInplaceOpt_registered'): optdb.register('LSTMOpGradInplaceOpt', theano.gof.TopoOptimizer(LSTMOpGradInplaceOpt), 50.0, 'fast_run', 'inplace', 'gpuarray') optdb.LSTMOpGradInplaceOpt_registered = True #------------------------ class LSTMOp(theano.sandbox.cuda.GpuOp): def __init__(self, inplace): self.inplace = inplace if inplace:
} Py_XDECREF(epsilon1); Py_XDECREF(epsilon2); Py_XDECREF(epsilon3); Py_XDECREF(epsilon4); """ % locals() #!!! change this when changing the code! def c_code_cache_version(self): return 2, 10 MultiDirectionalTwoDLSTMOpGradNoInplaceInstance = MultiDirectionalTwoDLSTMOpGrad(inplace=False) MultiDirectionalTwoDLSTMOpGradInplaceInstance = MultiDirectionalTwoDLSTMOpGrad(inplace=True) MultiDirectionalTwoDLSTMOpInplaceOpt = OpSub(MultiDirectionalTwoDLSTMOpGradNoInplaceInstance, MultiDirectionalTwoDLSTMOpGradInplaceInstance) #hack to avoid being called twice if not hasattr(optdb, 'MultiDirectionalTwoDLSTMOpInplaceOpt_registered'): optdb.register('MultiDirectionalTwoDLSTMOpInplaceOpt', theano.gof.TopoOptimizer(MultiDirectionalTwoDLSTMOpInplaceOpt, failure_callback=gof.TopoOptimizer.warn_inplace), 50.0, 'fast_run', 'inplace', 'gpuarray') optdb.MultiDirectionalTwoDLSTMOpInplaceOpt_registered = True class MultiDirectionalTwoDLSTMOp(theano.sandbox.cuda.GpuOp): __props__ = () def __init__(self): super(MultiDirectionalTwoDLSTMOp, self).__init__()
if(!%(inplace)s) { Py_XDECREF(epsilon); } """ % locals() #!!! change this when changing the code! #def c_code_cache_version(self): # return 1, 2 LSTMOpCellGradNoInplaceInstance = LSTMOpCellGrad(inplace=False) LSTMOpCellGradInplaceInstance = LSTMOpCellGrad(inplace=True) LSTMOpCellGradInplaceOpt = OpSub(LSTMOpCellGradNoInplaceInstance, LSTMOpCellGradInplaceInstance) #hack to avoid being called twice if not hasattr(optdb, 'LSTMOpCellGradInplaceOpt_registered'): optdb.register('LSTMOpCellGradInplaceOpt', theano.gof.TopoOptimizer(LSTMOpCellGradInplaceOpt), 50.0, 'fast_run', 'inplace', 'gpuarray') optdb.LSTMOpCellGradInplaceOpt_registered = True #------------------------ class LSTMOpCell(theano.sandbox.cuda.GpuOp): def __init__(self, inplace): self.inplace = inplace if inplace: #all outputs operate inplace on input 0 (which is Z)