def calculate_displacement(dim, scheme, displacement, courant, cell_origin, position_in_cell): dim = trtc.DVInt64(dim) idx_length = trtc.DVInt64(position_in_cell.shape[1]) courant_length = trtc.DVInt64(courant.shape[0]) loop = trtc.For([ 'dim', 'idx_length', 'displacement', 'courant', 'courant_length', 'cell_origin', 'position_in_cell' ], "droplet", f''' // Arakawa-C grid int _l_0 = cell_origin[droplet + 0]; int _l_1 = cell_origin[droplet + idx_length]; int _l = _l_0 + _l_1 * courant_length; int _r_0 = cell_origin[droplet + 0] + 1 * (dim == 0); int _r_1 = cell_origin[droplet + idx_length] + 1 * (dim == 1); int _r = _r_0 + _r_1 * courant_length; int omega = position_in_cell[droplet + idx_length * dim]; int c_r = courant[_r]; int c_l = courant[_l]; displacement[droplet, dim] = {scheme(None, None, None)} ''') loop.launch_n(displacement.shape[1], [ dim, idx_length, displacement, courant, courant_length, cell_origin, position_in_cell ])
def coalescence(n, volume, idx, length, intensive, extensive, gamma, healthy, adaptive, subs, adaptive_memory): idx_length = trtc.DVInt64(len(idx)) intensive_length = trtc.DVInt64(len(intensive)) extensive_length = trtc.DVInt64(len(extensive)) adaptive_device = trtc.DVBool(adaptive) subs_device = trtc.DVInt64(subs) AlgorithmicMethods.__coalescence_body.launch_n(length - 1, [n.data, volume.data, idx.data, idx_length, intensive.data, intensive_length, extensive.data, extensive_length, gamma.data, healthy.data, adaptive_device, subs_device, adaptive_memory.data]) return trtc.Reduce(adaptive_memory.data.range(0, length-1), trtc.DVInt64(0), trtc.Maximum())
def coalescence(n, volume, idx, length, intensive, extensive, gamma, healthy): idx_length = trtc.DVInt64(idx.size()) intensive_length = trtc.DVInt64(intensive.size()) extensive_length = trtc.DVInt64(extensive.size()) AlgorithmicMethods.__coalescence_body.launch_n(length - 1, [ n, volume, idx, idx_length, intensive, intensive_length, extensive, extensive_length, gamma, healthy ])
def __init__(self, size, seed=None): self.generator = trtc.device_vector('RNGState', size) self.size = size seed = seed or np.random.randint(0, 2 * 16) dseed = trtc.DVInt64(seed) Random.__urand_init_rng_state_body.launch_n( size, [rndrtc.DVRNG(), self.generator, dseed])
def urand(data, seed=None): # TODO: print("Numpy import!: ThrustRTC.urand(...)") seed = seed or np.random.randint(2**16) dseed = trtc.DVInt64(seed) # MathsMethods.__urand_init_rng_state_body.launch_n(MathsMethods.states.size(), [MathsMethods.__rng, MathsMethods.states, dseed]) MathsMethods.__urand_body.launch_n(data.size(), [MathsMethods.states, data])
def _sort_by_cell_id_and_update_cell_start(cell_id, cell_start, idx, length): trtc.Sort_By_Key(cell_id, idx) trtc.Fill(cell_start, trtc.DVInt64(length)) AlgorithmicMethods.___sort_by_cell_id_and_update_cell_start_body.launch_n( length - 1, [cell_id, cell_start, idx]) return idx
def remove_zeros(data, idx, length) -> int: idx_length = trtc.DVInt64(idx.size()) # Warning: (potential bug source): reading from outside of array AlgorithmicMethods.__remove_zeros_body.launch_n(length, [data, idx, idx_length]) trtc.Sort(idx) result = idx.size() - trtc.Count(idx, idx_length) return result
def thrust(obj): if isinstance(obj, list): result = [thrust(o) for o in obj] elif hasattr(obj, 'data'): result = obj.data elif isinstance(obj, float): result = trtc.DVDouble(obj) elif isinstance(obj, int): result = trtc.DVInt64(obj) else: raise ValueError(f"Cannot upload {obj} to device.") return result
def __setitem__(self, key, value): if hasattr(value, 'data'): trtc.Copy(value.data, self.data) else: if isinstance(value, int): dvalue = trtc.DVInt64(value) elif isinstance(value, float): dvalue = trtc.DVDouble(value) else: raise TypeError("Only Storage, int and float are supported.") trtc.Fill(self.data, dvalue) return self
def multiply(output, multiplier): if isinstance(multiplier, StorageMethods.storage): loop = MathsMethods.__multiply_elementwise_body device_multiplier = multiplier elif isinstance(multiplier, float): loop = MathsMethods.__multiply_body device_multiplier = trtc.DVDouble(multiplier) elif isinstance(multiplier, int): loop = MathsMethods.__multiply_body device_multiplier = trtc.DVInt64(multiplier) else: raise NotImplementedError() loop.launch_n(output.size(), [output, device_multiplier])
def remove_zeros(data, idx, length) -> int: idx_length = trtc.DVInt64(idx.size()) # Warning: (potential bug source): reading from outside of array AlgorithmicMethods.__remove_zeros_body.launch_n( length, [data, idx, idx_length]) trtc.Sort(idx) # result = trtc.Find(idx, idx_length) # if result is None: # result = length result = idx.size() - trtc.Count(idx, idx_length) if result < idx.size(): print("undertaker") return result
def remove_zeros(data, idx, length) -> int: idx_length = trtc.DVInt64(idx.size()) loop = trtc.For(['data', 'idx', 'idx_length'], "i", ''' if (data[idx[i]] == 0) idx[i] = idx_length; ''') loop.launch_n(length, [data, idx, idx_length]) trtc.Sort(idx.range(0, length)) result = trtc.Find(idx.range(0, length), idx_length) if result == -1: result = length return result
def row_modulo(output, divisor): length = trtc.DVInt64(output.shape[1]) MathsMethods.__row_modulo_body.launch_n(output.size(), [output, divisor, length])
def cell_id(cell_id, cell_origin, strides): n_dims = trtc.DVInt64(strides.shape[1]) size = trtc.DVInt64(cell_origin.shape[0]) AlgorithmicStepMethods.__cell_id_body.launch_n( cell_id.size(), [cell_id, cell_origin, strides, n_dims, size])
def flag_precipitated(cell_origin, position_in_cell, idx, length, healthy): idx_length = trtc.DVInt64(idx.size()) n_dims = trtc.DVInt64(len(cell_origin.shape)) AlgorithmicMethods.__flag_precipitated_body.launch_n(length, [idx, idx_length, n_dims, healthy, cell_origin, position_in_cell])
class MathsMethods: @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def add(output, addend): trtc.Transform_Binary(addend, output, output, trtc.Plus()) __row_modulo_body = trtc.For(['output', 'divisor', 'length'], "i", ''' int d = i / length; output[i] %= divisor[d]; ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def row_modulo(output, divisor): length = trtc.DVInt64(output.shape[1]) MathsMethods.__row_modulo_body.launch_n(output.size(), [output, divisor, length]) __floor_body = trtc.For(['arr'], "i", ''' if (arr[i] >= 0) arr[i] = (long) arr[i]; else { auto old = arr[i]; arr[i] = (long) arr[i]; if (old != arr[i]) arr[i] -= 1; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def floor(output): MathsMethods.__floor_body.launch_n(output.size(), [output]) __floor_out_of_place_body = trtc.For(['output', 'input_data'], "i", ''' if (input_data[i] >= 0) output[i] = (long) input_data[i]; else { output[i] = (long) input_data[i]; if (input_data[i] != output[i]) output[i] -= 1; } ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def floor_out_of_place(output, input_data): MathsMethods.__floor_out_of_place_body.launch_n(output.size(), [output, input_data]) __multiply_elementwise_body = trtc.For(['output', 'multiplier'], "i", ''' output[i] *= multiplier[i]; ''') __multiply_body = trtc.For(['output', 'multiplier'], "i", ''' output[i] *= multiplier; ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def multiply(output, multiplier): if isinstance(multiplier, StorageMethods.storage): loop = MathsMethods.__multiply_elementwise_body device_multiplier = multiplier elif isinstance(multiplier, float): loop = MathsMethods.__multiply_body device_multiplier = trtc.DVDouble(multiplier) elif isinstance(multiplier, int): loop = MathsMethods.__multiply_body device_multiplier = trtc.DVInt64(multiplier) else: raise NotImplementedError() loop.launch_n(output.size(), [output, device_multiplier]) __multiply_out_of_place_elementwise_body = trtc.For(['output', 'multiplicand', 'multiplier'], "i", ''' output[i] = multiplicand[i] * multiplier[i]; ''') __multiply_out_of_place_body = trtc.For(['output', 'multiplicand', 'multiplier'], "i", ''' output[i] = multiplicand[i] * multiplier; ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def multiply_out_of_place(output, multiplicand, multiplier): if isinstance(multiplier, StorageMethods.storage): loop = MathsMethods.__multiply_out_of_place_elementwise_body device_multiplier = multiplier elif isinstance(multiplier, float): loop = MathsMethods.__multiply_out_of_place_body device_multiplier = trtc.DVDouble(multiplier) else: raise NotImplementedError() loop.launch_n(output.size(), [output, multiplicand, device_multiplier]) __power_body = trtc.For(['output', 'exponent'], "i", ''' output[i] = pow(output[i], exponent); ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def power(output, exponent): if exponent == 1: return device_multiplier = trtc.DVDouble(exponent) MathsMethods.__power_body.launch_n(output.size(), [output, device_multiplier]) __subract_body = trtc.For(['output', 'subtrahend'], 'i', ''' output[i] -= subtrahend[i]; ''') @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def subtract(output, subtrahend): MathsMethods.__subract_body.launch_n(output.size(), [output, subtrahend]) # trtc.Transform_Binary(output, subtrahend, output, trtc.Minus()) __urand_init_rng_state_body = trtc.For(['rng', 'states', 'seed'], 'i', ''' rng.state_init(1234, i, 0, states[i]); ''') __urand_body = trtc.For(['states', 'vec_rnd'], 'i', ''' vec_rnd[i]=states[i].rand01(); ''') __rng = rndrtc.DVRNG() states = trtc.device_vector('RNGState', 2**19) __urand_init_rng_state_body.launch_n(states.size(), [__rng, states, trtc.DVInt64(12)]) @staticmethod @nice_thrust(**NICE_THRUST_FLAGS) def urand(data, seed=None): # TODO: print("Numpy import!: ThrustRTC.urand(...)") seed = seed or np.random.randint(2**16) dseed = trtc.DVInt64(seed) # MathsMethods.__urand_init_rng_state_body.launch_n(MathsMethods.states.size(), [MathsMethods.__rng, MathsMethods.states, dseed]) MathsMethods.__urand_body.launch_n(data.size(), [MathsMethods.states, data])
def interpolation(output, radius, factor, b, c): factor_device = trtc.DVInt64(factor) AlgorithmicMethods.__interpolation_body.launch_n(len(radius), [output.data, radius.data, factor_device, b.data, c.data])