def __init__(self, fun, outp_shape, inp_shape, par_slices, overlap, nslice, queue, num_dev, reverse=False, lhs=None, DTYPE=np.complex64, DTYPE_real = np.float32): self.fun = fun self.num_dev = num_dev self.slices = par_slices self.overlap = overlap self.queue = queue self.reverse = reverse self.nslice = nslice self.num_fun = len(self.fun) self.dtype = DTYPE self.lhs = lhs self.at_end = False self.idx_todev_start = 0 self.idx_todev_stop = 0 self.idx_tohost_start = 0 self.idx_tohost_stop = 0 self._resetindex() self.inp = [] self.outp = [] self._alloctmparrays(inp_shape, outp_shape) self.normkrnldiff = [] for q in queue: if DTYPE is np.complex64: self.normkrnldiff.append(clred.ReductionKernel( q.context, DTYPE_real, 0, reduce_expr="a+b", map_expr="pown(x[i].s0-y[i].s0,2)+pown(x[i].s1-y[i].s1,2)", arguments="__global float2 *x, __global float2 *y")) elif DTYPE is np.complex128: self.normkrnldiff.append(clred.ReductionKernel( q.context, DTYPE_real, 0, reduce_expr="a+b", map_expr="pown(x[i].s0-y[i].s0,2)+pown(x[i].s1-y[i].s1,2)", arguments="__global double2 *x, __global double2 *y"))
def get_reduction_kernel(self, reduce_expr, map_expr, neutral, *args): """Generate and return reduction kernel; see PyOpenCL documentation of pyopencl.reduction.ReductionKernel for detailed description. Function expects buffers that are in device address space, stored in gpu_* variables. :param reduce_expr: expression used to reduce two values into one, must use a and b as values names, e.g. 'a+b' :param map_expr: expression used to map value from input array, arrays are named x0, x1, etc., e.g. 'x0[i]*x1[i] :param neutral: neutral value in reduce_expr, e.g. '0' :param args: buffers on which to calculate reduction, e.g. backend.gpu_rho """ arrays = [] arguments = [] for i, arg in enumerate(args): array = self.arrays[arg] arrays.append(array) arguments.append('const {0} *x{1}'.format( pyopencl.tools.dtype_to_ctype(array.dtype), i)) kernel = reduction.ReductionKernel(arrays[0].dtype, neutral=neutral, reduce_expr=reduce_expr, map_expr=map_expr, arguments=', '.join(arguments)) return lambda: kernel(*arrays).get()
def __init__( self, par, ipiano_par, queue, # tau, fval, prg, coil, model, DTYPE=np.complex64, DTYPE_real=np.float32, ): self._DTYPE = DTYPE self._DTYPE_real = DTYPE_real self.delta = ipiano_par["delta"] self.omega = ipiano_par["omega"] self.lambd = ipiano_par["lambd"] self.alpha = ipiano_par.get("alpha", 0.0) self.alpha_max = ipiano_par.get("alpha_max", 0.0) self.beta = ipiano_par.get("beta", 1.0) # Iterations used by the solver self.iters = ipiano_par.get("max_iters", 10) self.tol = ipiano_par["tol"] self.stag = ipiano_par["stag"] self.display_iterations = ipiano_par["display_iterations"] # self.mu = 1 / self.delta self.beta_line = 1e3 # 1e10#1e12 self.theta_line = DTYPE_real(1.0) self.unknowns_TGV = par["unknowns_TGV"] # self.unknowns_H1 = par["unknowns_H1"] self.unknowns = par["unknowns"] self.num_dev = len(par["num_dev"]) self.dz = par["dz"] self._fval_init = fval self._prg = prg self._queue = queue self.model = model self._coils = coil self.modelgrad = None self.min_const = None self.max_const = None self.real_const = None self._kernelsize = ( par["par_slices"] + par["overlap"], par["dimY"], par["dimX"], ) var_size = "double" if self._DTYPE_real == np.float64 else "float" self.normkernl = elwis.ElementwiseKernel( context=par["ctx"][0], arguments="float *out, {}2 *x".format(var_size, var_size), operation="out[i]=pown(x[i].s0,2) + pown(x[i].s1,2)", name="norm_kernel", ) self.abskernl = elwis.ElementwiseKernel( context=par["ctx"][0], arguments="float *out, {}2 *x".format(var_size, var_size), operation="out[i]=x[i].s0+x[i].s1", name="abs_kernel", ) self.normkrnldiff = clred.ReductionKernel( par["ctx"][0], self._DTYPE_real, 0, reduce_expr="a+b", map_expr="pown(x[i].s0-y[i].s0,2)+pown(x[i].s1-y[i].s1,2)", arguments="__global {}2 *x, __global {}2 *y".format(var_size, var_size), )