def local_abstractconv_gemm(node): # If theano.config.blas.ldflags is empty, Theano will use # a NumPy C implementation of [sd]gemm_. if theano.config.cxx == "" or node.inputs[0].dtype == "float16": return if not isinstance(node.op, AbstractConv2d): return None img, kern = node.inputs if not isinstance(img.type, TensorType) or not isinstance( kern.type, TensorType): return None # need to flip the kernel if necessary if node.op.filter_flip: flip = (slice(None), ) * (kern.ndim - 2) + (slice(None, None, -1), ) * 2 kern = kern[flip] rval = CorrMM( border_mode=node.op.border_mode, subsample=node.op.subsample, filter_dilation=node.op.filter_dilation, num_groups=node.op.num_groups, unshared=node.op.unshared, )(img, kern) copy_stack_trace(node.outputs[0], rval) return [rval]
def run_conv_valid(self, inputs_shape, filters_shape, border_mode='valid', filter_dilation=(1, 1), subsample=(1, 1), verify_grad=False): inputs_shape = [inputs_shape[i] for i in (0, 3, 1, 2)] filters_shape = [filters_shape[i] for i in (0, 3, 1, 2)] inputs_val = np.random.random(inputs_shape).astype(config.floatX) filters_val = np.random.random(filters_shape).astype(config.floatX) inputs = gpuarray_shared_constructor(inputs_val) filters = gpuarray_shared_constructor(filters_val) conv_ref = CorrMM(border_mode=border_mode, filter_dilation=filter_dilation, subsample=subsample)(ref_cast(inputs), ref_cast(filters)) f_ref = theano.function([], conv_ref, mode=mode_without_gpu) conv = GpuCorrMM(border_mode=border_mode, filter_dilation=filter_dilation, subsample=subsample)(inputs, filters) f = theano.function([], conv, mode=mode_with_gpu) res_ref = f_ref() res = f() utt.assert_allclose(res_ref, res) if verify_grad: utt.verify_grad(GpuCorrMM(border_mode=border_mode, filter_dilation=filter_dilation, subsample=subsample), [inputs_val, filters_val])
def local_abstractconv_gemm(node): if not isinstance(node.op, AbstractConv2d): return None img, kern = node.inputs if not isinstance(img.type, TensorType) or \ not isinstance(kern.type, TensorType): return None # need to flip the kernel if necessary if node.op.filter_flip: kern = kern[:, :, ::-1, ::-1] rval = CorrMM(border_mode=node.op.border_mode, subsample=node.op.subsample)(img, kern) return [rval]
def local_abstractconv_gemm(node): if theano.config.cxx == "" or not theano.config.blas.ldflags: return if not isinstance(node.op, AbstractConv2d): return None img, kern = node.inputs if not isinstance(img.type, TensorType) or \ not isinstance(kern.type, TensorType): return None # need to flip the kernel if necessary if node.op.filter_flip: kern = kern[:, :, ::-1, ::-1] rval = CorrMM(border_mode=node.op.border_mode, subsample=node.op.subsample)(img, kern) copy_stack_trace(node.outputs[0], rval) return [rval]
def local_abstractconv_gemm(node): # If theano.config.blas.ldflags is empty, Theano will use # a NumPy C implementation of [sd]gemm_. if theano.config.cxx == "" or node.inputs[0].dtype == 'float16': return if not isinstance(node.op, AbstractConv2d): return None img, kern = node.inputs if not isinstance(img.type, TensorType) or \ not isinstance(kern.type, TensorType): return None # need to flip the kernel if necessary if node.op.filter_flip: kern = kern[:, :, ::-1, ::-1] rval = CorrMM(border_mode=node.op.border_mode, subsample=node.op.subsample, filter_dilation=node.op.filter_dilation)(img, kern) copy_stack_trace(node.outputs[0], rval) return [rval]