def make_node(self, A): ctx_name = infer_context_name(A) A = as_gpuarray_variable(A, ctx_name) A = gpu_contiguous(A) if A.ndim != 2: raise LinAlgError("Matrix rank error") if A.dtype != "float32": raise TypeError("only `float32` is supported for now") if self.compute_uv: return Apply( self, [A], # return S, U, VT [ GpuArrayType(A.dtype, broadcastable=[False], context_name=ctx_name)(), A.type(), A.type(), ], ) else: return Apply( self, [A], # return only S [ GpuArrayType(A.dtype, broadcastable=[False], context_name=ctx_name)() ], )
def make_node(self, inp1, inp2): if not cublas_available: raise RuntimeError("CUBLAS is not available and " "GpuCublasTriangularSolve Op " "can not be constructed.") context_name = infer_context_name(inp1, inp2) inp1 = as_gpuarray_variable(inp1, context_name) inp2 = as_gpuarray_variable(inp2, context_name) inp1 = gpu_contiguous(inp1) inp2 = gpu_contiguous(inp2) assert inp1.ndim == 2 assert inp2.ndim in [1, 2] assert inp1.dtype == inp2.dtype return Apply( self, [inp1, inp2], [ GpuArrayType( inp1.dtype, broadcastable=inp2.broadcastable, context_name=context_name, )() ], )
def make_node(self, inp, ws, stride=None, pad=None): ctx_name = infer_context_name(inp) inp = as_gpuarray_variable(inp, ctx_name) nd = self.ndim assert inp.ndim == nd + 2 if stride is None: stride = ws if pad is None: pad = (0, ) * nd elif isinstance(pad, (tuple, list)): if max(pad) != 0 and not self.ignore_border: raise ValueError("Padding works only with ignore_border=True") if isinstance(ws, (tuple, list)): if any(pad[i] >= ws[i] for i in range(nd)): raise ValueError("Padding must be smaller than strides") ws = as_tensor_variable(ws) stride = as_tensor_variable(stride) pad = as_tensor_variable(pad) assert ws.ndim == stride.ndim and ws.ndim == pad.ndim assert ws.ndim == 1 if ws.dtype not in int_dtypes: raise TypeError("Window shape parameters must be ints.") if stride.dtype not in int_dtypes: raise TypeError("Stride parameters must be ints.") if pad.dtype not in int_dtypes: raise TypeError("Padding parameters must be ints.") ws = aesara.tensor.cast(ws, "int64") stride = aesara.tensor.cast(stride, "int64") pad = aesara.tensor.cast(pad, "int64") return Apply(self, [inp, ws, stride, pad], [inp.type()])
def make_node(self, inp, out_grad, ws, stride=None, pad=None): ctx_name = infer_context_name(inp, out_grad) nd = self.ndim inp = as_gpuarray_variable(inp, ctx_name) assert inp.ndim == nd + 2 out_grad = as_gpuarray_variable(out_grad, ctx_name) assert out_grad.ndim == nd + 2 assert out_grad.ndim == inp.ndim if stride is None: stride = ws if pad is None: pad = (0, ) * nd elif isinstance(pad, (tuple, list)): if max(pad) != 0 and not self.mode == "average_exc_pad": raise ValueError("Padding must be zero for average_exc_pad") ws = as_tensor_variable(ws) stride = as_tensor_variable(stride) pad = as_tensor_variable(pad) assert ws.ndim == stride.ndim and ws.ndim == pad.ndim assert ws.ndim == 1 if ws.dtype not in int_dtypes: raise TypeError("Window shape parameters must be ints.") if stride.dtype not in int_dtypes: raise TypeError("Stride parameters must be ints.") if pad.dtype not in int_dtypes: raise TypeError("Padding parameters must be ints.") ws = aesara.tensor.cast(ws, "int64") stride = aesara.tensor.cast(stride, "int64") pad = aesara.tensor.cast(pad, "int64") return Apply(self, [inp, out_grad, ws, stride, pad], [inp.type()])
def make_node(self, inp, out, out_grad, ws, stride=None, pad=None): ctx_name = infer_context_name(inp, out, out_grad) nd = self.ndim inp = as_gpuarray_variable(inp, ctx_name) assert inp.ndim == nd + 2 out = as_gpuarray_variable(out, ctx_name) assert out_grad.ndim == nd + 2 out_grad = as_gpuarray_variable(out_grad, ctx_name) assert out.ndim == nd + 2 assert out_grad.ndim == inp.ndim assert inp.ndim == out.ndim if stride is None: stride = ws if pad is None: pad = (0, ) * nd ws = as_tensor_variable(ws) stride = as_tensor_variable(stride) pad = as_tensor_variable(pad) assert ws.ndim == stride.ndim and ws.ndim == pad.ndim assert ws.ndim == 1 if ws.dtype not in int_dtypes: raise TypeError("Window shape parameters must be ints.") if stride.dtype not in int_dtypes: raise TypeError("Stride parameters must be ints.") if pad.dtype not in int_dtypes: raise TypeError("Padding parameters must be ints.") ws = aesara.tensor.cast(ws, "int64") stride = aesara.tensor.cast(stride, "int64") pad = aesara.tensor.cast(pad, "int64") return Apply(self, [inp, out, out_grad, ws, stride, pad], [inp.type()])
def make_node(self, ten4, neib_shape, neib_step=None): ten4 = as_gpuarray_variable(ten4, infer_context_name(ten4)) neib_shape = tt.as_tensor_variable(neib_shape) if neib_step is None: neib_step = neib_shape else: neib_step = tt.as_tensor_variable(neib_step) assert ten4.ndim == 4 assert neib_shape.ndim == 1 assert neib_step.ndim == 1 assert neib_shape.dtype in tt.integer_dtypes assert neib_step.dtype in tt.integer_dtypes return Apply( self, [ten4, neib_shape, neib_step], [ GpuArrayType( broadcastable=(False, False), dtype=ten4.type.dtype, context_name=ten4.type.context_name, )() ], )
def make_node(self, inp, s=None): # A shape parameter s can be provided as an input. For now this is used to # manage odd transform sizes. # Later this could be extended to handle padding and trunkation, # following numpy's interface. However, cuFFT expects array that match # the shape given to the plan, so padding will have to be done in the op. # The effect of padding on gradients has yet to be investigated. if not skcuda_available: raise RuntimeError("skcuda is needed for CuFFTOp") if not pygpu_available: raise RuntimeError("pygpu is needed for CuFFTOp") if not pycuda_available: raise RuntimeError("pycuda is needed for CuFFTOp") inp = gpu_contiguous(as_gpuarray_variable(inp, infer_context_name(inp))) # If no shape is provided as input, default to input data shape. if s is None: s = inp.shape[1:] s = as_tensor_variable(s) assert inp.dtype == "float32" assert s.ndim == 1 assert s.dtype in integer_dtypes return Apply(self, [inp, s], [self.output_type(inp)()])
def make_node(self, inp1, inp2): if not cusolver_available: raise RuntimeError("CUSOLVER is not available and " "GpuCusolverSolve Op can not be constructed.") if skcuda.__version__ <= "0.5.1": warnings.warn( "The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8" ) context_name = infer_context_name(inp1, inp2) inp1 = as_gpuarray_variable(inp1, context_name) inp2 = as_gpuarray_variable(inp2, context_name) inp1 = gpu_contiguous(inp1) inp2 = gpu_contiguous(inp2) assert inp1.ndim == 2 assert inp2.ndim == 2 assert inp1.dtype == inp2.dtype return Apply( self, [inp1, inp2], [ GpuArrayType( inp1.dtype, broadcastable=inp1.broadcastable, context_name=context_name, )() ], )
def make_node(self, A): ctx_name = infer_context_name(A) A = as_gpuarray_variable(A, ctx_name) A = gpu_contiguous(A) if A.ndim != 2: raise LinAlgError("Matrix rank error") if A.dtype != "float32": raise TypeError("only `float32` is supported for now") return Apply(self, [A], [A.type()])
def make_node(self, rstate, size): # error checking slightly redundant here, since # this op should not be called directly. # # call through MRG_RandomStream instead. broad = [] for i in range(self.output_type.ndim): broad.append(aet.extract_constant(size[i]) == 1) output_type = self.output_type.clone(broadcastable=broad)() rstate = as_gpuarray_variable(rstate, infer_context_name(rstate)) return Apply(self, [rstate, size], [rstate.type(), output_type])
def make_node(self, x, b, y_idx): ctx_name = infer_context_name(x, b, y_idx) x = as_gpuarray_variable(x, ctx_name) b = as_gpuarray_variable(b, ctx_name) y_idx = as_gpuarray_variable(y_idx, ctx_name) nll = GpuArrayType(x.type.dtype, y_idx.type.broadcastable, context_name=ctx_name)() sm = x.type() am = y_idx.type() return Apply(self, [x, b, y_idx], [nll, sm, am])
def make_node(self, o, x, y, xIdx, yIdx, alpha=None): ctx = infer_context_name(o, x, y) one = aet.constant(np.asarray(1.0, dtype="float32")) o = as_gpuarray_variable(o, ctx) x = as_gpuarray_variable(x, ctx) y = as_gpuarray_variable(y, ctx) xIdx = as_tensor_variable(xIdx) yIdx = as_tensor_variable(yIdx) if alpha is None: alpha = one return Apply(self, [o, x, y, xIdx, yIdx, alpha], [o.type()])
def make_node(self, x): assert x.type.dtype == "float32", "Only float32 supported for GpuCumOp" context_name = infer_context_name(x) x = as_gpuarray_variable(x, context_name) if x.ndim > GpuCumOp.SUPPORTED_NDIMS: raise NotImplementedError("Only cum op on 1D, 2D and\ 3D arrays are supported right now!") if self.axis >= x.ndim or self.axis < -x.ndim: raise ValueError(f"axis(={self.axis}) out of bounds") return Apply(self, [x], [x.type()])
def make_node(self, activations, labels, input_lengths): context_name = infer_context_name(activations) t_activations = as_gpuarray_variable(activations, context_name=context_name) # Ensure activations array is C-contiguous t_activations = gpu_contiguous(t_activations) # Labels and input lengths are always on the CPU t_labels = as_tensor_variable(labels) t_input_lengths = as_tensor_variable(input_lengths) if t_activations.type.dtype != "float32": raise TypeError("activations must use the float32 type.") if t_activations.ndim != 3: raise ValueError("activations must have 3 dimensions.") if t_labels.type.dtype != "int32": raise TypeError("labels must use the int32 type.") if t_labels.ndim != 2: raise ValueError("labels must have 2 dimensions.") if t_input_lengths.type.dtype != "int32": raise TypeError("input_lengths must use the int32 type.") if t_input_lengths.ndim != 1: raise ValueError("input_lengths must have 1 dimension.") costs = GpuArrayType(dtype="float32", broadcastable=(False, ), context_name=context_name)() outputs = [costs] if self.compute_grad: gradients = GpuArrayType( dtype="float32", broadcastable=( False, False, False, ), context_name=context_name, )() outputs += [gradients] return Apply(self, inputs=[t_activations, t_labels, t_input_lengths], outputs=outputs)
def make_node(self, o, W, h, inputIdx, outputIdx): ctx = infer_context_name(o, W, h) o = as_gpuarray_variable(o, ctx) W = as_gpuarray_variable(W, ctx) h = as_gpuarray_variable(h, ctx) inputIdx = as_tensor_variable(inputIdx) outputIdx = as_tensor_variable(outputIdx) assert o.ndim == 3 assert W.ndim == 4 assert h.ndim == 3 assert inputIdx.ndim == 2 assert outputIdx.ndim == 2 assert inputIdx.type.dtype in discrete_dtypes assert outputIdx.type.dtype in discrete_dtypes return Apply(self, [o, W, h, inputIdx, outputIdx], [o.type()])
def local_abstractconv_cudnn(fgraph, node): ctx = infer_context_name(*node.inputs) if not isinstance(node.inputs[0].type, GpuArrayType): return if node.op.unshared: return None if isinstance(node.op.border_mode, tuple) and any( isinstance(p, tuple) for p in node.op.border_mode): # Asymmetric padding not yet supported return None if isinstance(node.op, AbstractConv2d): with inherit_stack_trace(node.outputs): return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs) elif isinstance(node.op, AbstractConv3d): with inherit_stack_trace(node.outputs): return local_abstractconv3d_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
def make_node(self, pvals, unis): ctx_name = infer_context_name(pvals, unis) pvals = as_gpuarray_variable(pvals, ctx_name) unis = as_gpuarray_variable(unis, ctx_name) assert pvals.dtype in ["float32", "float16", "float64"] assert unis.dtype in ["float32", "float16", "float64"] if pvals.ndim != 2: raise NotImplementedError("pvals ndim should be 2", pvals.ndim) if unis.ndim != 1: raise NotImplementedError("unis ndim should be 1", unis.ndim) if self.odtype == "auto": odtype = pvals.dtype else: odtype = self.odtype br = (pvals.broadcastable[1], pvals.broadcastable[0]) out = GpuArrayType(broadcastable=br, dtype=odtype, context_name=ctx_name)() return Apply(self, [pvals, unis], [out])
def make_node(self, inp): if not cusolver_available: raise RuntimeError("CUSOLVER is not available and " "GpuCholesky Op can not be constructed.") if skcuda.__version__ <= "0.5.1": warnings.warn("The GpuCholesky op requires scikit-cuda > " "0.5.1 to work with CUDA 8") if not pygpu_available: raise RuntimeError("Missing pygpu or triu/tril functions." "Install or update libgpuarray.") context_name = infer_context_name(inp) inp = as_gpuarray_variable(inp, context_name) inp = gpu_contiguous(inp) assert inp.ndim == 2 return Apply(self, [inp], [inp.type()])
def local_abstractconv_cudnn_alt(fgraph, node): if not isinstance(node.op, (AbstractConv2d, AbstractConv2d_gradWeights, AbstractConv2d_gradInputs)): return if version(raises=False) < 6000 and node.op.filter_dilation != (1, 1): return None if node.op.unshared: return None if isinstance(node.op.border_mode, tuple) and any( isinstance(p, tuple) for p in node.op.border_mode): # Asymmetric padding not yet supported return None inp1 = node.inputs[0] inp2 = node.inputs[1] if not dnn_available(inp1.type.context_name): return op = node.op border_mode = node.op.border_mode subsample = node.op.subsample filter_dilation = node.op.filter_dilation num_groups = node.op.num_groups precision, _ = get_precision(None, [inp1, inp2]) if node.op.filter_flip: conv_mode = "conv" else: conv_mode = "cross" if isinstance(op, AbstractConv2d): if border_mode == "half" or subsample != (1, 1) or num_groups != 1: return None if border_mode == "full": direction_hint = "bprop inputs" elif border_mode == "valid" and filter_dilation == (1, 1): direction_hint = "bprop weights" else: return None rval = dnn_conv( inp1, inp2, border_mode=border_mode, subsample=subsample, dilation=filter_dilation, direction_hint=direction_hint, conv_mode=conv_mode, num_groups=num_groups, ) elif isinstance(op, AbstractConv2d_gradWeights): if (border_mode == "valid" and subsample == (1, 1) and filter_dilation == (1, 1) and num_groups == 1): img = gpu_contiguous(inp1) topgrad = gpu_contiguous(inp2) ctx_name = infer_context_name(img, topgrad) img = gpu_contiguous(img.dimshuffle(1, 0, 2, 3)) topgrad = gpu_contiguous(topgrad.dimshuffle(1, 0, 2, 3)) ishape = [shape_i_op(i)(img) for i in range(img.ndim)] tshape = [shape_i_op(i)(topgrad) for i in range(topgrad.ndim)] out_shp = get_conv_output_shape( ishape, tshape, border_mode=border_mode, subsample=subsample, filter_dilation=filter_dilation, ) out_shp = assert_conv_shape(out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) desc = GpuDnnConvDesc( border_mode=border_mode, subsample=subsample, dilation=filter_dilation, conv_mode="cross", precision=precision, )(out.shape) conv = GpuDnnConv(algo=None, num_groups=num_groups)(img, topgrad, out, desc) if conv_mode == "conv": conv = conv[:, :, ::-1, ::-1] rval = as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3), ctx_name) else: return None elif isinstance(op, AbstractConv2d_gradInputs): if border_mode == "valid" and subsample == (1, 1) and num_groups == 1: kerns = gpu_contiguous(inp1.dimshuffle(1, 0, 2, 3)) topgrad = gpu_contiguous(inp2) ctx_name = infer_context_name(kerns, topgrad) conv_mode = "cross" if conv_mode == "conv" else "conv" desc = GpuDnnConvDesc( border_mode="full", subsample=subsample, dilation=filter_dilation, conv_mode=conv_mode, precision=precision, )(kerns.shape) tshape = [shape_i_op(i)(topgrad) for i in range(topgrad.ndim)] kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)] shape = get_conv_output_shape( tshape, kshape, border_mode="full", subsample=subsample, filter_dilation=filter_dilation, ) shape = assert_conv_shape(shape) out = GpuAllocEmpty(dtype=topgrad.dtype, context_name=ctx_name)(*shape) rval = GpuDnnConv(algo=None, num_groups=num_groups)(topgrad, kerns, out, desc) else: return None return [rval]
def make_node(self, dnll, sm, y_idx): ctx_name = infer_context_name(dnll, sm, y_idx) dnll = as_gpuarray_variable(dnll, ctx_name) sm = as_gpuarray_variable(sm, ctx_name) y_idx = as_gpuarray_variable(y_idx, ctx_name) return Apply(self, [dnll, sm, y_idx], [sm.type()])
def make_node(self, x): x = as_gpuarray_variable(x, infer_context_name(x)) return Apply(self, [x], [x.type()])
def make_node(self, x, b): ctx_name = infer_context_name(x, b) x = as_gpuarray_variable(x, ctx_name) b = as_gpuarray_variable(b, ctx_name) return Apply(self, [x, b], [x.type()])
def local_gpua_mrg(fgraph, node): context_name = infer_context_name(*node.inputs) return local_gpua_mrg_graph(fgraph, node.op, context_name, node.inputs, node.outputs)