import ThrustRTC as trtc is_odd = trtc.Functor({}, ['x'], ''' return x % 2; ''') darr = trtc.device_vector_from_list([-5, 0, 2, -3, 2, 4, 0, -1, 2, 8], 'int32_t') trtc.Transform(darr, darr, trtc.Negate()) print(darr.to_host()) darr_in1 = trtc.device_vector_from_list([-5, 0, 2, 3, 2, 4], 'int32_t') darr_in2 = trtc.device_vector_from_list([3, 6, -2, 1, 2, 3], 'int32_t') darr_out = trtc.device_vector('int32_t', 6) trtc.Transform_Binary(darr_in1, darr_in2, darr_out, trtc.Plus()) print(darr_out.to_host()) darr = trtc.device_vector_from_list([-5, 0, 2, -3, 2, 4, 0, -1, 2, 8], 'int32_t') trtc.Transform_If(darr, darr, trtc.Negate(), is_odd) print(darr.to_host()) darr_data = trtc.device_vector_from_list([-5, 0, 2, -3, 2, 4, 0, -1, 2, 8], 'int32_t') darr_stencil = trtc.device_vector_from_list([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], 'int32_t') trtc.Transform_If_Stencil(darr_data, darr_stencil, darr_data, trtc.Negate(), trtc.Identity()) print(darr_data.to_host()) darr_in1 = trtc.device_vector_from_list([-5, 0, 2, 3, 2, 4], 'int32_t')
import ThrustRTC as trtc darr = trtc.device_vector('int32_t', 10) trtc.Sequence(darr) trtc.Tabulate(darr, trtc.Negate()) print(darr.to_host())
import ThrustRTC as trtc d_int_in = trtc.device_vector_from_list([0, 1, 2, 3, 4], 'int32_t') d_float_in = trtc.device_vector_from_list([0.0, 10.0, 20.0, 30.0, 40.0], 'float') d_int_out = trtc.device_vector('int32_t', 5) d_float_out = trtc.device_vector('float', 5) zipped_in = trtc.DVZipped([d_int_in, d_float_in], ['a', 'b']) zipped_out = trtc.DVZipped([d_int_out, d_float_out], ['a', 'b']) trtc.Copy(zipped_in, zipped_out) print(d_int_out.to_host()) print(d_float_out.to_host()) d_int_in = trtc.DVCounter(trtc.DVInt32(0), 5) d_float_in = trtc.DVTransform( d_int_in, "float", trtc.Functor({}, ['i'], ' return (float)i*10.0f +10.0f;\n')) zipped_in = trtc.DVZipped([d_int_in, d_float_in], ['a', 'b']) trtc.Copy(zipped_in, zipped_out) print(d_int_out.to_host()) print(d_float_out.to_host()) const_in = trtc.DVConstant( trtc.DVTuple({ 'a': trtc.DVInt32(123), 'b': trtc.DVFloat(456.0) }), 5) trtc.Copy(const_in, zipped_out)
import ThrustRTC as trtc darr = trtc.device_vector_from_list([1, 0, 2, 2, 1, 3], 'int32_t') print(trtc.Min_Element(darr)) print(trtc.Max_Element(darr)) print(trtc.MinMax_Element(darr))
import ThrustRTC as trtc d1 = trtc.device_vector_from_list([0, 5, 3, 7], 'int32_t') d2 = trtc.device_vector_from_list([0, 5, 8, 7], 'int32_t') print(trtc.Mismatch(d1, d2)) print(trtc.Mismatch(d1, d2, trtc.EqualTo()))
def find_pairs(cell_start, is_first_in_pair, cell_id, idx, length): perm_cell_id = trtc.DVPermutation(cell_id, idx) if length > 1: AlgorithmicStepMethods.__find_pairs_body.launch_n( length - 1, [cell_start, perm_cell_id, is_first_in_pair])
def max_pair(data_out, data_in, is_first_in_pair, idx, length): # note: silently assumes that data_out is not permuted (i.e. not part of state) perm_in = trtc.DVPermutation(data_in, idx) if length > 1: AlgorithmicStepMethods.__max_pair_body.launch_n( length - 1, [data_out, perm_in, is_first_in_pair])
def _sort_by_cell_id_and_update_cell_start(cell_id, cell_start, idx, length): trtc.Sort_By_Key(cell_id.data, idx.data) trtc.Fill(cell_start.data, trtc.DVInt64(length)) AlgorithmicMethods.___sort_by_cell_id_and_update_cell_start_body.launch_n(length - 1, [cell_id.data, cell_start.data, idx.data]) return idx
class AlgorithmicMethods: @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def calculate_displacement(dim, scheme, displacement, courant, cell_origin, position_in_cell): dim = trtc.DVInt64(dim) idx_length = trtc.DVInt64(position_in_cell.shape[1]) courant_length = trtc.DVInt64(courant.shape[0]) loop = trtc.For(['dim', 'idx_length', 'displacement', 'courant', 'courant_length', 'cell_origin', 'position_in_cell'], "droplet", f''' // Arakawa-C grid int _l_0 = cell_origin[droplet + 0]; int _l_1 = cell_origin[droplet + idx_length]; int _l = _l_0 + _l_1 * courant_length; int _r_0 = cell_origin[droplet + 0] + 1 * (dim == 0); int _r_1 = cell_origin[droplet + idx_length] + 1 * (dim == 1); int _r = _r_0 + _r_1 * courant_length; int omega = position_in_cell[droplet + idx_length * dim]; int c_r = courant[_r]; int c_l = courant[_l]; displacement[droplet, dim] = {scheme(None, None, None)} ''') loop.launch_n(displacement.shape[1], [dim, idx_length, displacement, courant, courant_length, cell_origin, position_in_cell]) __coalescence_body = trtc.For(['n', 'volume', 'idx', 'idx_length', 'intensive', 'intensive_length', 'extensive', 'extensive_length', 'gamma', 'healthy', 'adaptive', 'subs', 'adaptive_memory'], "i", ''' if (gamma[i] == 0) { adaptive_memory[i] = 1; return; } int j = idx[i]; int k = idx[i + 1]; if (n[j] < n[k]) { j = idx[i + 1]; k = idx[i]; } int g = n[j] / n[k]; if (adaptive) adaptive_memory[i] = (int)(gamma[i] * subs / g); if (g > gamma[i]) g = gamma[i]; if (g == 0) return; int new_n = n[j] - g * n[k]; if (new_n > 0) { n[j] = new_n; for (int attr = 0; attr < intensive_length; attr+=idx_length) { intensive[attr + k] = (intensive[attr + k] * volume[k] + intensive[attr + j] * g * volume[j]) / (volume[k] + g * volume[j]); } for (int attr = 0; attr < extensive_length; attr+=idx_length) { extensive[attr + k] += g * extensive[attr + j]; } } else { // new_n == 0 n[j] = (int)(n[k] / 2); n[k] = n[k] - n[j]; for (int attr = 0; attr < intensive_length; attr+=idx_length) { intensive[attr + j] = (intensive[attr + k] * volume[k] + intensive[attr + j] * g * volume[j]) / (volume[k] + g * volume[j]); intensive[attr + k] = intensive[attr + j]; } for (int attr = 0; attr < extensive_length; attr+=idx_length) { extensive[attr + j] = g * extensive[attr + j] + extensive[attr + k]; extensive[attr + k] = extensive[attr + j]; } } if (n[k] == 0 || n[j] == 0) { healthy[0] = 0; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def coalescence(n, volume, idx, length, intensive, extensive, gamma, healthy, adaptive, subs, adaptive_memory): idx_length = trtc.DVInt64(len(idx)) intensive_length = trtc.DVInt64(len(intensive)) extensive_length = trtc.DVInt64(len(extensive)) adaptive_device = trtc.DVBool(adaptive) subs_device = trtc.DVInt64(subs) AlgorithmicMethods.__coalescence_body.launch_n(length - 1, [n.data, volume.data, idx.data, idx_length, intensive.data, intensive_length, extensive.data, extensive_length, gamma.data, healthy.data, adaptive_device, subs_device, adaptive_memory.data]) return trtc.Reduce(adaptive_memory.data.range(0, length-1), trtc.DVInt64(0), trtc.Maximum()) __compute_gamma_body = trtc.For(['prob', 'rand'], "i", ''' prob[i] = -floor(-prob[i] + rand[int(i / 2)]); ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def compute_gamma(prob, rand): AlgorithmicMethods.__compute_gamma_body.launch_n(len(prob), [prob.data, rand.data]) @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def condensation( solver, n_cell, cell_start_arg, v, particle_temperatures, n, vdry, idx, rhod, thd, qv, dv, prhod, pthd, pqv, kappa, rtol_x, rtol_thd, dt, substeps, cell_order ): raise NotImplementedError() __flag_precipitated_body = trtc.For(['idx', 'idx_length', 'n_dims', 'healthy', 'cell_origin', 'position_in_cell'], "i", ''' if (cell_origin[idx_length * (n_dims-1) + i] == 0 && position_in_cell[idx_length * (n_dims-1) + i] < 0) { idx[i] = idx_length; healthy[0] = 0; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def flag_precipitated(cell_origin, position_in_cell, idx, length, healthy): idx_length = trtc.DVInt64(idx.size()) n_dims = trtc.DVInt64(len(cell_origin.shape)) AlgorithmicMethods.__flag_precipitated_body.launch_n(length, [idx, idx_length, n_dims, healthy, cell_origin, position_in_cell]) __linear_collection_efficiency_body = trtc.For(['A', 'B', 'D1', 'D2', 'E1', 'E2', 'F1', 'F2', 'G1', 'G2', 'G3', 'Mf', 'Mg', 'output', 'radii', 'is_first_in_pair', 'unit'], "i", ''' output[i] = 0; if (is_first_in_pair[i]) { double r = radii[i] / unit; double r_s = radii[i + 1] / unit; double p = r_s / r; if (p != 0 && p != 1) { double G = pow((G1 / r), Mg) + G2 + G3 * r; double Gp = pow((1 - p), G); if (Gp != 0) { double D = D1 / pow(r, D2); double E = E1 / pow(r, E2); double F = pow((F1 / r), Mf) + F2; output[i] = A + B * p + D / pow(p, F) + E / Gp; if (output[i] < 0) { output[i] = 0; } } } } ''') @staticmethod def linear_collection_efficiency(params, output, radii, is_first_in_pair, unit): A, B, D1, D2, E1, E2, F1, F2, G1, G2, G3, Mf, Mg = params dA = trtc.DVDouble(A) dB = trtc.DVDouble(B) dD1 = trtc.DVDouble(D1) dD2 = trtc.DVDouble(D2) dE1 = trtc.DVDouble(E1) dE2 = trtc.DVDouble(E2) dF1 = trtc.DVDouble(F1) dF2 = trtc.DVDouble(F2) dG1 = trtc.DVDouble(G1) dG2 = trtc.DVDouble(G2) dG3 = trtc.DVDouble(G3) dMf = trtc.DVDouble(Mf) dMg = trtc.DVDouble(Mg) dunit = trtc.DVDouble(unit) AlgorithmicMethods.__linear_collection_efficiency_body.launch_n(len(is_first_in_pair) - 1, [dA, dB, dD1, dD2, dE1, dE2, dF1, dF2, dG1, dG2, dG3, dMf, dMg, output.data, radii.data, is_first_in_pair.data, dunit]) __interpolation_body = trtc.For(['output', 'radius', 'factor', 'a', 'b'], 'i', ''' int r_id = (int)(factor * radius[i]); auto r_rest = (factor * radius[i] - r_id) / factor; output[i] = a[r_id] + r_rest * b[r_id]; ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def interpolation(output, radius, factor, b, c): factor_device = trtc.DVInt64(factor) AlgorithmicMethods.__interpolation_body.launch_n(len(radius), [output.data, radius.data, factor_device, b.data, c.data]) @staticmethod def make_cell_caretaker(idx, cell_start, scheme): return AlgorithmicMethods._sort_by_cell_id_and_update_cell_start @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def moments(moment_0, moments, n, attr, cell_id, idx, length, specs_idx, specs_rank, min_x, max_x, x_id): # TODO print("Numba import!: ThrustRTC.moments(...)") from PySDM.backends.numba.numba import Numba host_moment_0 = moment_0.to_ndarray() host_moments = moments.to_ndarray() host_n = n.to_ndarray() host_attr = attr.to_ndarray() host_cell_id = cell_id.to_ndarray() host_idx = idx.to_ndarray() host_specs_idx = specs_idx.to_ndarray() host_specs_rank = specs_rank.to_ndarray() Numba.moments_body(host_moment_0, host_moments, host_n, host_attr, host_cell_id, host_idx, length, host_specs_idx, host_specs_rank, min_x, max_x, x_id) moment_0.upload(host_moment_0) moments.upload(host_moments) __normalize_body_0 = trtc.For(['cell_start', 'norm_factor', 'dt_div_dv'], "i", ''' int sd_num = cell_start[i + 1] - cell_start[i]; if (sd_num < 2) { norm_factor[i] = 0; } else { int half_sd_num = sd_num / 2; norm_factor[i] = dt_div_dv * sd_num * (sd_num - 1) / 2 / half_sd_num; } ''') __normalize_body_1 = trtc.For(['prob', 'cell_id', 'norm_factor'], "d", ''' prob[d] *= norm_factor[cell_id[d]]; ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def normalize(prob, cell_id, cell_start, norm_factor, dt_div_dv): n_cell = cell_start.shape[0] - 1 device_dt_div_dv = trtc.DVDouble(dt_div_dv) AlgorithmicMethods.__normalize_body_0.launch_n(n_cell, [cell_start.data, norm_factor.data, device_dt_div_dv]) AlgorithmicMethods.__normalize_body_1.launch_n(prob.shape[0], [prob.data, cell_id.data, norm_factor.data]) __remove_zeros_body = trtc.For(['data', 'idx', 'idx_length'], "i", ''' if (idx[i] < idx_length && data[idx[i]] == 0) idx[i] = idx_length; ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def remove_zeros(data, idx, length) -> int: idx_length = trtc.DVInt64(idx.size()) # Warning: (potential bug source): reading from outside of array AlgorithmicMethods.__remove_zeros_body.launch_n(length, [data, idx, idx_length]) trtc.Sort(idx) result = idx.size() - trtc.Count(idx, idx_length) return result ___sort_by_cell_id_and_update_cell_start_body = trtc.For(['cell_id', 'cell_start', 'idx'], "i", ''' if (i == 0) { cell_start[cell_id[idx[0]]] = 0; } else { int cell_id_curr = cell_id[idx[i]]; int cell_id_next = cell_id[idx[i + 1]]; int diff = (cell_id_next - cell_id_curr); for (int j = 1; j <= diff; j++) { cell_start[cell_id_curr + j] = idx[i + 1]; } } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def _sort_by_cell_id_and_update_cell_start(cell_id, cell_start, idx, length): trtc.Sort_By_Key(cell_id.data, idx.data) trtc.Fill(cell_start.data, trtc.DVInt64(length)) AlgorithmicMethods.___sort_by_cell_id_and_update_cell_start_body.launch_n(length - 1, [cell_id.data, cell_start.data, idx.data]) return idx
def interpolation(output, radius, factor, b, c): factor_device = trtc.DVInt64(factor) AlgorithmicMethods.__interpolation_body.launch_n(len(radius), [output.data, radius.data, factor_device, b.data, c.data])
def normalize(prob, cell_id, cell_start, norm_factor, dt_div_dv): n_cell = cell_start.shape[0] - 1 device_dt_div_dv = trtc.DVDouble(dt_div_dv) AlgorithmicMethods.__normalize_body_0.launch_n(n_cell, [cell_start.data, norm_factor.data, device_dt_div_dv]) AlgorithmicMethods.__normalize_body_1.launch_n(prob.shape[0], [prob.data, cell_id.data, norm_factor.data])
def linear_collection_efficiency(params, output, radii, is_first_in_pair, unit): A, B, D1, D2, E1, E2, F1, F2, G1, G2, G3, Mf, Mg = params dA = trtc.DVDouble(A) dB = trtc.DVDouble(B) dD1 = trtc.DVDouble(D1) dD2 = trtc.DVDouble(D2) dE1 = trtc.DVDouble(E1) dE2 = trtc.DVDouble(E2) dF1 = trtc.DVDouble(F1) dF2 = trtc.DVDouble(F2) dG1 = trtc.DVDouble(G1) dG2 = trtc.DVDouble(G2) dG3 = trtc.DVDouble(G3) dMf = trtc.DVDouble(Mf) dMg = trtc.DVDouble(Mg) dunit = trtc.DVDouble(unit) AlgorithmicMethods.__linear_collection_efficiency_body.launch_n(len(is_first_in_pair) - 1, [dA, dB, dD1, dD2, dE1, dE2, dF1, dF2, dG1, dG2, dG3, dMf, dMg, output.data, radii.data, is_first_in_pair.data, dunit])
def flag_precipitated(cell_origin, position_in_cell, idx, length, healthy): idx_length = trtc.DVInt64(idx.size()) n_dims = trtc.DVInt64(len(cell_origin.shape)) AlgorithmicMethods.__flag_precipitated_body.launch_n(length, [idx, idx_length, n_dims, healthy, cell_origin, position_in_cell])
import ThrustRTC as trtc d_value = trtc.device_vector_from_list([1, 3, 3, 3, 2, 2, 1], 'int32_t') count = trtc.Unique(d_value) print(d_value.to_host(0, count)) d_value = trtc.device_vector_from_list([1, 3, 3, 3, 2, 2, 1], 'int32_t') count = trtc.Unique(d_value, trtc.EqualTo()) print(d_value.to_host(0, count)) d_in = trtc.device_vector_from_list([1, 3, 3, 3, 2, 2, 1], 'int32_t') d_out = trtc.device_vector('int32_t', 7) count = trtc.Unique_Copy(d_in, d_out) print(d_out.to_host(0, count)) d_in = trtc.device_vector_from_list([1, 3, 3, 3, 2, 2, 1], 'int32_t') d_out = trtc.device_vector('int32_t', 7) count = trtc.Unique_Copy(d_in, d_out, trtc.EqualTo()) print(d_out.to_host(0, count)) d_keys = trtc.device_vector_from_list([1, 3, 3, 3, 2, 2, 1], 'int32_t') d_values = trtc.device_vector_from_list([9, 8, 7, 6, 5, 4, 3], 'int32_t') count = trtc.Unique_By_Key(d_keys, d_values) print(d_keys.to_host(0, count)) print(d_values.to_host(0, count)) d_keys = trtc.device_vector_from_list([1, 3, 3, 3, 2, 2, 1], 'int32_t') d_values = trtc.device_vector_from_list([9, 8, 7, 6, 5, 4, 3], 'int32_t') count = trtc.Unique_By_Key(d_keys, d_values, trtc.EqualTo()) print(d_keys.to_host(0, count)) print(d_values.to_host(0, count))
def amin(row, idx): perm_in = trtc.DVPermutation(row.data, idx.data) index = trtc.Min_Element(perm_in.range(0, len(row))) row_idx = idx[index] result = row[row_idx] return result
import ThrustRTC as trtc dinput = trtc.device_vector_from_list([3, 7, 2, 5], 'int32_t') doutput = trtc.device_vector('int32_t', 4) dreverse = trtc.DVReverse(dinput) trtc.Transform(dreverse, doutput, trtc.Negate()) print(doutput.to_host())
def cell_id(cell_id, cell_origin, strides): n_dims = trtc.DVInt64(strides.shape[1]) size = trtc.DVInt64(cell_origin.shape[0]) AlgorithmicStepMethods.__cell_id_body.launch_n( cell_id.size(), [cell_id, cell_origin, strides, n_dims, size])
class StorageMethods: # TODO check static For storage = trtc.DVVector.DVVector integer = np.int64 double = np.float64 @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def array(shape, dtype): if dtype in (float, StorageMethods.double): elem_cls = 'double' elem_dtype = StorageMethods.double elif dtype in (int, StorageMethods.integer): elem_cls = 'int64_t' elem_dtype = StorageMethods.integer else: raise NotImplementedError data = trtc.device_vector(elem_cls, int(np.prod(shape))) # TODO: trtc.Fill(data, trtc.DVConstant(np.nan)) StorageMethods.__equip(data, shape, elem_dtype) return data @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def download(backend_data, numpy_target): if isinstance(backend_data, StorageMethods.storage): numpy_target[:] = np.reshape(backend_data.to_host(), backend_data.shape) else: numpy_target[:] = StorageMethods.to_ndarray(backend_data) @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def from_ndarray(array): shape = array.shape if str(array.dtype).startswith('int'): dtype = StorageMethods.integer elif str(array.dtype).startswith('float'): dtype = StorageMethods.double else: raise NotImplementedError if array.ndim > 1: array = array.astype(dtype).flatten() else: array = array.astype(dtype) result = trtc.device_vector_from_numpy(array) StorageMethods.__equip(result, shape, dtype) return result @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def range(array, start=0, stop=None): if stop is None: stop = array.shape[0] dim = len(array.shape) if dim == 1: result = array.range(start, stop) new_shape = (stop - start, ) elif dim == 2: result = array.range(array.shape[1] * start, array.shape[1] * stop) new_shape = (stop - start, array.shape[1]) else: raise NotImplementedError( "Only 2 or less dimensions array is supported.") StorageMethods.__equip(result, shape=new_shape, dtype=array.dtype) return result @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def read_row(array, i): row_length = array.shape[1] start = row_length * i stop = start + row_length result = array.range(start, stop) StorageMethods.__equip(result, shape=(row_length, ), dtype=array.dtype) return result @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def shuffle_global(idx, length, u01): # WARNING: ineffective implementation trtc.Sort_By_Key(u01.range(0, length), idx.range(0, length)) __shuffle_local_body = trtc.For(['cell_start', 'u01', 'idx'], "c", ''' for (int i=cell_start[c+1]-1; i > cell_start[c]; i--) { int j = cell_start[c] + u01[i] * (cell_start[c+1] - cell_start[c]); int tmp = idx[i]; idx[i] = idx[j]; idx[j] = tmp; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def shuffle_local(idx, u01, cell_start): StorageMethods.__shuffle_local_body.launch_n(cell_start.size() - 1, [cell_start, u01, idx]) # TODO: print("Numba import!: ThrustRTC.shuffle_local(...)") # from PySDM.backends.numba.numba import Numba # host_idx = StorageMethods.to_ndarray(idx) # host_u01 = StorageMethods.to_ndarray(u01) # host_cell_start = StorageMethods.to_ndarray(cell_start) # Numba.shuffle_local(host_idx, host_u01, host_cell_start) # device_idx = StorageMethods.from_ndarray(host_idx) # trtc.Copy(device_idx, idx) @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def to_ndarray(data): # TODO: move to __equip?? if isinstance(data, StorageMethods.storage): pass elif isinstance(data, trtc.DVVector.DVRange): data_copy = StorageMethods.array(data.shape, float) trtc.Copy(data, data_copy) data = data_copy else: raise NotImplementedError() result = data.to_host() result = np.reshape(result, data.shape) return result @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def upload(numpy_data, backend_target): tmp = trtc.device_vector_from_numpy(numpy_data.flatten()) trtc.Swap(tmp, backend_target) @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def write_row(array, i, row): row_length = array.shape[1] start = row_length * i stop = start + row_length trtc.Copy(row, array.range(start, stop)) @staticmethod def __equip(data, shape, dtype): if isinstance(shape, int): shape = (shape, ) data.shape = shape data.dtype = dtype def get(index): return trtc.Reduce(data.range(index, index + 1)) data.get = get
class AlgorithmicStepMethods: @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def amax(row, idx): perm_in = trtc.DVPermutation(row.data, idx.data) index = trtc.Max_Element(perm_in.range(0, len(row))) row_idx = idx[index] result = row[row_idx] return result @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def amin(row, idx): perm_in = trtc.DVPermutation(row.data, idx.data) index = trtc.Min_Element(perm_in.range(0, len(row))) row_idx = idx[index] result = row[row_idx] return result __cell_id_body = trtc.For( ['cell_id', 'cell_origin', 'strides', 'n_dims', 'size'], "i", ''' cell_id[i] = 0; for (int j = 0; j < n_dims; j++) { cell_id[i] += cell_origin[size * i + j] * strides[j]; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def cell_id(cell_id, cell_origin, strides): n_dims = trtc.DVInt64(strides.shape[1]) size = trtc.DVInt64(cell_origin.shape[0]) AlgorithmicStepMethods.__cell_id_body.launch_n( cell_id.size(), [cell_id, cell_origin, strides, n_dims, size]) __distance_pair_body = trtc.For( ['data_out', 'data_in', 'is_first_in_pair'], "i", ''' if (is_first_in_pair[i]) { data_out[i] = abs(data_in[i] - data_in[i + 1]); } else { data_out[i] = 0; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def distance_pair(data_out, data_in, is_first_in_pair, idx, length): # note: silently assumes that data_out is not permuted (i.e. not part of state) perm_in = trtc.DVPermutation(data_in, idx) if length > 1: AlgorithmicStepMethods.__distance_pair_body.launch_n( length - 1, [data_out, perm_in, is_first_in_pair]) __find_pairs_body = trtc.For( ['cell_start', 'perm_cell_id', 'is_first_in_pair'], "i", ''' is_first_in_pair[i] = ( perm_cell_id[i] == perm_cell_id[i+1] && (i - cell_start[perm_cell_id[i]]) % 2 == 0 ); ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def find_pairs(cell_start, is_first_in_pair, cell_id, idx, length): perm_cell_id = trtc.DVPermutation(cell_id, idx) if length > 1: AlgorithmicStepMethods.__find_pairs_body.launch_n( length - 1, [cell_start, perm_cell_id, is_first_in_pair]) __max_pair_body = trtc.For(['data_out', 'perm_in', 'is_first_in_pair'], "i", ''' if (is_first_in_pair[i]) { data_out[i] = max(perm_in[i], perm_in[i + 1]); } else { data_out[i] = 0; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def max_pair(data_out, data_in, is_first_in_pair, idx, length): # note: silently assumes that data_out is not permuted (i.e. not part of state) perm_in = trtc.DVPermutation(data_in, idx) if length > 1: AlgorithmicStepMethods.__max_pair_body.launch_n( length - 1, [data_out, perm_in, is_first_in_pair]) __sort_pair_body = trtc.For(['data_out', 'data_in', 'is_first_in_pair'], "i", ''' if (is_first_in_pair[i]) { if (data_in[i] < data_in[i + 1]) { data_out[i] = data_in[i + 1]; data_out[i + 1] = data_in[i]; } else { data_out[i] = data_in[i]; data_out[i + 1] = data_in[i + 1]; } } else { data_out[i] = 0; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def sort_pair(data_out, data_in, is_first_in_pair, idx, length): # note: silently assumes that data_out is not permuted (i.e. not part of state) perm_in = trtc.DVPermutation(data_in, idx) trtc.Fill(data_out, trtc.DVDouble(0)) if length > 1: AlgorithmicStepMethods.__sort_pair_body.launch_n( length - 1, [data_out, perm_in, is_first_in_pair]) __sum_pair_body = trtc.For(['data_out', 'perm_in', 'is_first_in_pair'], "i", ''' if (is_first_in_pair[i]) { data_out[i] = perm_in[i] + perm_in[i + 1]; } else { data_out[i] = 0; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def sum_pair(data_out, data_in, is_first_in_pair, idx, length): # note: silently assumes that data_out is not permuted (i.e. not part of state) perm_in = trtc.DVPermutation(data_in, idx) if length > 1: AlgorithmicStepMethods.__sum_pair_body.launch_n( length - 1, [data_out, perm_in, is_first_in_pair])
def upload(numpy_data, backend_target): tmp = trtc.device_vector_from_numpy(numpy_data.flatten()) trtc.Swap(tmp, backend_target)
import ThrustRTC as trtc d_input = trtc.device_vector_from_list([0, 2, 5, 7, 8], 'int32_t') print(trtc.Lower_Bound(d_input, trtc.DVInt32(0))) print(trtc.Lower_Bound(d_input, trtc.DVInt32(1))) print(trtc.Lower_Bound(d_input, trtc.DVInt32(2))) print(trtc.Lower_Bound(d_input, trtc.DVInt32(3))) print(trtc.Lower_Bound(d_input, trtc.DVInt32(8))) print(trtc.Lower_Bound(d_input, trtc.DVInt32(9))) print() print(trtc.Upper_Bound(d_input, trtc.DVInt32(0))) print(trtc.Upper_Bound(d_input, trtc.DVInt32(1))) print(trtc.Upper_Bound(d_input, trtc.DVInt32(2))) print(trtc.Upper_Bound(d_input, trtc.DVInt32(3))) print(trtc.Upper_Bound(d_input, trtc.DVInt32(8))) print(trtc.Upper_Bound(d_input, trtc.DVInt32(9))) print() print(trtc.Binary_Search(d_input, trtc.DVInt32(0))) print(trtc.Binary_Search(d_input, trtc.DVInt32(1))) print(trtc.Binary_Search(d_input, trtc.DVInt32(2))) print(trtc.Binary_Search(d_input, trtc.DVInt32(3))) print(trtc.Binary_Search(d_input, trtc.DVInt32(8))) print(trtc.Binary_Search(d_input, trtc.DVInt32(9)))
def write_row(array, i, row): row_length = array.shape[1] start = row_length * i stop = start + row_length trtc.Copy(row, array.range(start, stop))
import ThrustRTC as trtc is_even = trtc.Functor({}, ['x'], ''' return ((x % 2) == 0); ''') dvalues = trtc.device_vector_from_list([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], 'int32_t') dmap = trtc.device_vector_from_list([0, 2, 4, 6, 8, 1, 3, 5, 7, 9], 'int32_t') doutput = trtc.device_vector('int32_t', 10) trtc.Gather(dmap, dvalues, doutput) print(doutput.to_host()) dvalues = trtc.device_vector_from_list([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'int32_t') dstencil = trtc.device_vector_from_list([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], 'int32_t') dmap = trtc.device_vector_from_list([0, 2, 4, 6, 8, 1, 3, 5, 7, 9], 'int32_t') doutput = trtc.device_vector_from_list([7, 7, 7, 7, 7, 7, 7, 7, 7, 7], 'int32_t') trtc.Gather_If(dmap, dstencil, dvalues, doutput) print(doutput.to_host()) dvalues = trtc.device_vector_from_list([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'int32_t') dstencil = trtc.device_vector_from_list([0, 3, 4, 1, 4, 1, 2, 7, 8, 9], 'int32_t') dmap = trtc.device_vector_from_list([0, 2, 4, 6, 8, 1, 3, 5, 7, 9], 'int32_t') doutput = trtc.device_vector_from_list([7, 7, 7, 7, 7, 7, 7, 7, 7, 7],
def get(index): return trtc.Reduce(data.range(index, index + 1))
import ThrustRTC as trtc negate = trtc.Functor( {}, ['x'], ''' return -x; ''') darr = trtc.device_vector('int32_t', 10) trtc.Transform(trtc.DVCounter(trtc.DVInt32(5), 10), darr, trtc.Negate()) print (darr.to_host())
def shuffle_global(idx, length, u01): # WARNING: ineffective implementation trtc.Sort_By_Key(u01.range(0, length), idx.range(0, length))
import ThrustRTC as trtc dIn = trtc.device_vector_from_list([10, 20, 30, 40, 50, 60, 70, 80], 'int32_t') dOut = trtc.device_vector('int32_t', 8) trtc.Copy(dIn, dOut) print(dOut.to_host()) is_even = trtc.Functor({}, ['x'], ''' return x % 2 == 0; ''') dIn = trtc.device_vector_from_list([-2, 0, -1, 0, 1, 2], 'int32_t') dOut = trtc.device_vector('int32_t', 6) count = trtc.Copy_If(dIn, dOut, is_even) print(dOut.to_host(0, count)) dIn = trtc.device_vector_from_list([0, 1, 2, 3, 4, 5], 'int32_t') dStencil = trtc.device_vector_from_list([-2, 0, -1, 0, 1, 2], 'int32_t') dOut = trtc.device_vector('int32_t', 6) count = trtc.Copy_If_Stencil(dIn, dStencil, dOut, is_even) print(dOut.to_host(0, count))
import ThrustRTC as trtc dvalues = trtc.device_vector_from_list( [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0], 'float') dindices = trtc.device_vector_from_list([2, 6, 1, 3], 'int32_t') doutput = trtc.device_vector('float', 4) perm = trtc.DVPermutation(dvalues, dindices) trtc.Transform(perm, doutput, trtc.Negate()) print(doutput.to_host())
# example showing how to separate init and call import ThrustRTC as trtc import CURandRTC as rndrtc rng = rndrtc.DVRNG() ker_init = trtc.For(['rng','states'], 'idx', ''' rng.state_init(1234, idx, 0, states[idx]); ''') ker_call = trtc.For(['states', 'vec_rnd'], 'idx', ''' vec_rnd[idx]=(float)states[idx].rand01(); ''' ) rng_states = trtc.device_vector('RNGState', 1024) d_vec_rnd = trtc.device_vector('float', 1024) ker_init.launch_n(1024, [rng, rng_states]) ker_call.launch_n(1024, [rng_states, d_vec_rnd]) print (d_vec_rnd.to_host())
import ThrustRTC as trtc square_root = trtc.Functor({}, ['x'], ''' return sqrtf(x); ''') dvalues = trtc.device_vector_from_list([1.0, 4.0, 9.0, 16.0], 'float') doutput = trtc.device_vector('float', 4) dtrans = trtc.DVTransform(dvalues, 'float', square_root) trtc.Transform(dtrans, doutput, trtc.Negate()) print(doutput.to_host())