def concat_backward(gradient: Tensor, tensors: List[Tensor], axis: int = 0): _check_tensors(*tensors) engine = _get_engine(*tensors) grad_arrays = engine.split(gradient.data, len(tensors), axis=axis) for idx, tensor in enumerate(tensors): _set_grad(tensor, data=grad_arrays[idx] * engine.ones_like(tensor.data))
def eps_greedy_hypothesis(q_func, env, state, n_actions, eps=.3): choice = np.random.rand() if choice < eps: action = env.action_space.sample() return action #return np.random.randint(0, n_actions) else: s0 = cp.array(state[None].astype(DTYPE)) # Outputs y0 = q_func(s0).data q0 = y0[:, :n_actions] v0 = y0[:, -1:] h0 = cp.split(y0[:, n_actions:-1], n_actions, axis=1) y1 = [q_func(s1).data for s1 in h0] #q1 = [y[:n_actions].get() for y in y1] hn = cp.split(y0[:, n_actions:-1], n_actions, axis=1) yn = [q_func(s).data for s in hn] qn = np.hstack([y[:n_actions].get().flatten()[None].T for y in yn]) to_max = qn for _ in range(4): hn = [ cp.vstack(cp.split(y[:, n_actions:-1], n_actions, axis=1)) for y in yn ] yn = [q_func(s).data for s in hn] qn = np.hstack([y[:n_actions].get().flatten()[None].T for y in yn]) to_max = np.vstack([to_max, qn]) move = (to_max).max(axis=0).argmax() #qn = [y[:n_actions].get() for y in yn] #move = np.argmax([np.mean(q**2) for q in qn]) #move = np.argmax([np.max(q) for q in q1]) ''' output = np.argmax(q_func(s0)[0].data.get()[:n_actions]) print(move, output) ''' return move
def backward(self, error): x, axis = self.cache # numpyとcupyで挙動が異なる # cupy.cumsum --> 入力がリストだとエラーになる # cupy.split --> インデックスデータをCPUに移す必要がある if DEVICE == "gpu": dxs = np.split(error, numpy.cumsum([ x[i].value.shape[axis] for i in range(len(x) - 1) ]), axis=axis) else: dxs = np.split( error, np.cumsum([x[i].value.shape[axis] for i in range(len(x) - 1)]), axis=axis) for i, dx in enumerate(dxs): x[i].accumulate(dx)
def forward(self): # input is the tensor phi:(1120,1120,3) # clone phi? phi = cp.asarray(self.phi) h = cp.fft.ifft2(cp.fft.ifftshift(cp.exp(1.0j*phi),axes=(0, 1)),axes=(0, 1)) psf_new = cp.square(cp.abs(h)) if len(psf_new.shape)==2: norm = cp.sum(psf_new) else: norm = cp.reshape(cp.sum(psf_new,axis=(0,1)),(1,1)+psf_new.shape[2:]) psf_new = psf_new/norm psf_new_rescaled = cv.resize(cp.asnumpy(psf_new), (final_dim, final_dim), interpolation=cv.INTER_NEAREST) psf_new_rescaled = cp.asarray(psf_new_rescaled) A = np.load(filename_crosstalk) A_t = np.transpose(A) P = cp.asnumpy(psf_new_rescaled) B = np.matmul(A_t.reshape((1,1)+A_t.shape),P.reshape(P.shape+(1,))).reshape(P.shape) # psf_new_unpadded (152, 228, 3) psf_new_unpadded = B[unpad_1:-unpad_1, unpad_2:-unpad_2] psf_new_unpadded = cp.asarray(psf_new_unpadded) # tiled kernels (76, 228, 3) tiled_kernels = cp.split(psf_new_unpadded,2)[0]-cp.split(psf_new_unpadded,2)[1] # (48, 3, 3, 3) weights_pm = [] for i in range(rows): for j in range(cols): padded_kernel = cp.split(cp.split(tiled_kernels,rows,axis=0)[i],cols,axis=1)[j] kernel = padded_kernel[pad:-pad, pad:-pad] weights_pm.append(kernel) #(3,3,3,48) weights_pm = np.asarray(weights_pm) weights_pm = np.transpose(weights_pm, (1,2,3,0)) mempool.free_all_blocks() return weights_pm*norm_factor
def converter(batch, device): if device == 0: batch = np.array(batch) elif device == 1: batch = cuda.to_gpu(xp.array(batch)) elif device >= 2: batch = cuda.to_cpu(batch) batch = xp.split(xp.array(batch).astype(xp.int64), device) batch = [cuda.to_gpu(batch[i], i) for i in range(device)] return batch
def chunk(tensor: Tensor, chunks: int, dim: int = 0): _check_tensors(tensor) engine = _get_engine(tensor) arrays = engine.split(tensor.data, chunks, dim) tensors = [] for array in arrays: tensors.append(_create_tensor( tensor, data=array, func=wrapped_partial(chunk_backward, tensor=tensor, chunks=chunks) )) return tensors
def forward_gpu(self, inputs): gate_inputs, prev_c = inputs forget_gate_input, input_gate_input, tanh_input, output_gate_input = cupy.split( gate_inputs, 4, axis=1) kernel_input = "T forget_gate_input, T input_gate_input, T tanh_input, T output_gate_input, T prev_c" kernel_outputs = "T forget_gate, T input_gate, T tanh_gate, T output_gate, T tanh_next_c, T next_h, T next_c" kernel = "forget_gate = (tanh(forget_gate_input * 0.5f) + 1.0f) * 0.5f;"\ "input_gate = (tanh(input_gate_input * 0.5f) + 1.0f) * 0.5f;"\ "tanh_gate = tanh(tanh_input);"\ "next_c = forget_gate * prev_c + input_gate * tanh_gate;"\ "output_gate = (tanh(output_gate_input * 0.5f) + 1.0f) * 0.5f;"\ "tanh_next_c = tanh(next_c);"\ "next_h = output_gate * tanh(next_c)" (self.forget_gate, self.input_gate, self.tanh_gate, self.output_gate, self.tanh_next_c, self.next_h, self.next_c) = cuda.elementwise( kernel_input, kernel_outputs, kernel, 'gqn_core_fwd')(forget_gate_input, input_gate_input, tanh_input, output_gate_input, prev_c) return self.next_h, self.next_c
def _stratify_split(X, stratify, labels, n_train, n_test, x_numba, y_numba, random_state): """ Function to perform a stratified split based on stratify column. Based on scikit-learn stratified split implementation. Parameters ---------- X, y: Shuffled input data and labels stratify: column to be stratified on. n_train: Number of samples in train set n_test: number of samples in test set x_numba: Determines whether the data should be converted to numba y_numba: Determines whether the labales should be converted to numba Returns ------- X_train, X_test: Data X divided into train and test sets y_train, y_test: Labels divided into train and test sets """ x_cudf = False labels_cudf = False if isinstance(X, cudf.DataFrame): x_cudf = True elif hasattr(X, "__cuda_array_interface__"): X = cp.asarray(X) x_order = _strides_to_order(X.__cuda_array_interface__['strides'], cp.dtype(X.dtype)) # labels and stratify will be only cp arrays if isinstance(labels, cudf.Series): labels_cudf = True labels = labels.values elif hasattr(labels, "__cuda_array_interface__"): labels = cp.asarray(labels) elif isinstance(stratify, cudf.DataFrame): # ensuring it has just one column if labels.shape[1] != 1: raise ValueError('Expected one column for labels, but found df' 'with shape = %d' % (labels.shape)) labels_cudf = True labels = labels[0].values labels_order = _strides_to_order( labels.__cuda_array_interface__['strides'], cp.dtype(labels.dtype)) # Converting to cupy array removes the need to add an if-else block # for startify column if isinstance(stratify, cudf.Series): stratify = stratify.values elif hasattr(stratify, "__cuda_array_interface__"): stratify = cp.asarray(stratify) elif isinstance(stratify, cudf.DataFrame): # ensuring it has just one column if stratify.shape[1] != 1: raise ValueError('Expected one column, but found column' 'with shape = %d' % (stratify.shape)) stratify = stratify[0].values classes, stratify_indices = cp.unique(stratify, return_inverse=True) n_classes = classes.shape[0] class_counts = cp.bincount(stratify_indices) if cp.min(class_counts) < 2: raise ValueError("The least populated class in y has only 1" " member, which is too few. The minimum" " number of groups for any class cannot" " be less than 2.") if n_train < n_classes: raise ValueError('The train_size = %d should be greater or ' 'equal to the number of classes = %d' % (n_train, n_classes)) class_indices = cp.split(cp.argsort(stratify_indices), cp.cumsum(class_counts)[:-1].tolist()) X_train = None # random_state won't be None or int, that's handled earlier if isinstance(random_state, np.random.RandomState): random_state = cp.random.RandomState(seed=random_state.get_state()[1]) # Break ties n_i = _approximate_mode(class_counts, n_train, random_state) class_counts_remaining = class_counts - n_i t_i = _approximate_mode(class_counts_remaining, n_test, random_state) for i in range(n_classes): permutation = random_state.permutation(class_counts[i].item()) perm_indices_class_i = class_indices[i].take(permutation) y_train_i = cp.array(labels[perm_indices_class_i[:n_i[i]]], order=labels_order) y_test_i = cp.array(labels[perm_indices_class_i[n_i[i]:n_i[i] + t_i[i]]], order=labels_order) if hasattr(X, "__cuda_array_interface__") or \ isinstance(X, cupyx.scipy.sparse.csr_matrix): X_train_i = cp.array(X[perm_indices_class_i[:n_i[i]]], order=x_order) X_test_i = cp.array(X[perm_indices_class_i[n_i[i]:n_i[i] + t_i[i]]], order=x_order) if X_train is None: X_train = cp.array(X_train_i, order=x_order) y_train = cp.array(y_train_i, order=labels_order) X_test = cp.array(X_test_i, order=x_order) y_test = cp.array(y_test_i, order=labels_order) else: X_train = cp.concatenate([X_train, X_train_i], axis=0) X_test = cp.concatenate([X_test, X_test_i], axis=0) y_train = cp.concatenate([y_train, y_train_i], axis=0) y_test = cp.concatenate([y_test, y_test_i], axis=0) elif x_cudf: X_train_i = X.iloc[perm_indices_class_i[:n_i[i]]] X_test_i = X.iloc[perm_indices_class_i[n_i[i]:n_i[i] + t_i[i]]] if X_train is None: X_train = X_train_i y_train = y_train_i X_test = X_test_i y_test = y_test_i else: X_train = cudf.concat([X_train, X_train_i], ignore_index=False) X_test = cudf.concat([X_test, X_test_i], ignore_index=False) y_train = cp.concatenate([y_train, y_train_i], axis=0) y_test = cp.concatenate([y_test, y_test_i], axis=0) if x_numba: X_train = cuda.as_cuda_array(X_train) X_test = cuda.as_cuda_array(X_test) elif x_cudf: X_train = cudf.DataFrame(X_train) X_test = cudf.DataFrame(X_test) if y_numba: y_train = cuda.as_cuda_array(y_train) y_test = cuda.as_cuda_array(y_test) elif labels_cudf: y_train = cudf.Series(y_train) y_test = cudf.Series(y_test) return X_train, X_test, y_train, y_test
def oaconvolve(in1, in2, mode="full", axes=None): """Convolve two N-dimensional arrays using the overlap-add method. Convolve ``in1`` and ``in2`` using the overlap-add method, with the output size determined by the ``mode`` argument. This is generally faster than ``convolve`` for large arrays, and generally faster than ``fftconvolve`` when one array is much larger than the other, but can be slower when only a few output values are needed or when the arrays are very similar in shape, and can only output float arrays (int or object array inputs will be cast to float). Args: in1 (cupy.ndarray): First input. in2 (cupy.ndarray): Second input. Should have the same number of dimensions as ``in1``. mode (str): Indicates the size of the output: - ``'full'``: output is the full discrete linear \ cross-correlation (default) - ``'valid'``: output consists only of those elements that do \ not rely on the zero-padding. Either ``in1`` or \ ``in2`` must be at least as large as the other in \ every dimension. - ``'same'``: output is the same size as ``in1``, centered \ with respect to the ``'full'`` output axes (scalar or tuple of scalar or None): Axes over which to compute the convolution. The default is over all axes. Returns: cupy.ndarray: the result of convolution .. seealso:: :func:`cupyx.scipy.signal.convolve` .. seealso:: :func:`cupyx.scipy.signal.fftconvolve` .. seealso:: :func:`cupyx.scipy.ndimage.convolve` .. seealso:: :func:`scipy.signal.oaconvolve` """ out = _st_core._check_conv_inputs(in1, in2, mode) if out is not None: return out if in1.shape == in2.shape: # Equivalent to fftconvolve return fftconvolve(in1, in2, mode=mode, axes=axes) in1, in2, axes = _st_core._init_freq_conv_axes(in1, in2, mode, axes, sorted_axes=True) s1, s2 = in1.shape, in2.shape if not axes: return _st_core._apply_conv_mode(in1 * in2, s1, s2, mode, axes) # Calculate the block sizes for the output, steps, first and second inputs. # It is simpler to calculate them all together than doing them in separate # loops due to all the special cases that need to be handled. optimal_sizes = (_st_core._calc_oa_lens(s1[i], s2[i]) if i in axes else (-1, -1, s1[i], s2[i]) for i in range(in1.ndim)) block_size, overlaps, in1_step, in2_step = zip(*optimal_sizes) # Fall back to fftconvolve if there is only one block in every dimension if in1_step == s1 and in2_step == s2: return fftconvolve(in1, in2, mode=mode, axes=axes) # Pad and reshape the inputs for overlapping and adding shape_final = [ s1[i] + s2[i] - 1 if i in axes else None for i in range(in1.ndim) ] in1, in2 = _st_core._oa_reshape_inputs(in1, in2, axes, shape_final, block_size, overlaps, in1_step, in2_step) # Reshape the overlap-add parts to input block sizes split_axes = [iax + i for i, iax in enumerate(axes)] fft_axes = [iax + 1 for iax in split_axes] # Do the convolution fft_shape = [block_size[i] for i in axes] ret = _st_core._freq_domain_conv(in1, in2, fft_axes, fft_shape, calc_fast_len=False) # Do the overlap-add for ax, ax_fft, ax_split in zip(axes, fft_axes, split_axes): overlap = overlaps[ax] if overlap is None: continue ret, overpart = cupy.split(ret, [-overlap], ax_fft) overpart = cupy.split(overpart, [-1], ax_split)[0] ret_overpart = cupy.split(ret, [overlap], ax_fft)[0] ret_overpart = cupy.split(ret_overpart, [1], ax_split)[1] ret_overpart += overpart # Reshape back to the correct dimensionality shape_ret = [ ret.shape[i] if i not in fft_axes else ret.shape[i] * ret.shape[i - 1] for i in range(ret.ndim) if i not in split_axes ] ret = ret.reshape(*shape_ret) # Slice to the correct size ret = ret[tuple([slice(islice) for islice in shape_final])] return _st_core._apply_conv_mode(ret, s1, s2, mode, axes)
def compress_by_chunk(self, cupy_bool_tensor, num_chunks): packed_sign = cupy.packbits(cupy_bool_tensor) sign_list_packed = cupy.split(packed_sign, num_chunks) cupy.cuda.get_current_stream().synchronize() return sign_list_packed
def _init_households(self, household_data: dict): """ Initializes parameters for households. Seniors living in pairs are simulated separately. """ dice = cp.random.random(self._size) elderly_indexes = self._indices[(self.age >= 60) * (dice <= household_data['elderly'][2])] current_city_ids = self.city_id[elderly_indexes] meetings = [[], []] for city_id in self.city_ids: city_indexes = elderly_indexes[current_city_ids == city_id] split_indexes = cp.split( city_indexes[:int(len(city_indexes) / 2) * 2], 2) for i, s in enumerate(split_indexes): meetings[i].append(s) self._household_meetings_elderly = { 'first': cp.hstack(meetings[0]), 'second': cp.hstack(meetings[1]) } # === split the rest of the population into households: self._household_sizes = cp.ones(self._size) splitting_dice = cp.random.random(self._size) current_threshold = 0 self._max_household_size = max( [s for s in household_data['young'].keys()]) for s, ratio in household_data['young'].items(): mask = (current_threshold <= splitting_dice) * ( splitting_dice < current_threshold + ratio) self._household_sizes[mask] = s current_threshold += ratio self._household_sizes[cp.hstack([ self._household_meetings_elderly['first'], self._household_meetings_elderly['second'], ])] = -1 for household_size in range(2, self._max_household_size + 1): current_indexes = self._indices[self._household_sizes == household_size] if len(current_indexes) == 0: continue current_city_ids = self.city_id[current_indexes] meetings = [[] for i in range(household_size)] for city_id in self.city_ids: city_indexes = current_indexes[current_city_ids == city_id] split_indexes = cp.split( city_indexes[:int(len(city_indexes) / household_size) * household_size], household_size) for i, s in enumerate(split_indexes): meetings[i].append(s) self._household_meetings[household_size] = [ cp.hstack(m) for m in meetings ] self._household_sizes[self._household_sizes == -1] = 2 # === plot the distribution of households: check_plot_dir = self.config.get('check_plots') ensure_dir(check_plot_dir) if cuda: ages = cp.asnumpy(self.age) household_sizes = cp.asnumpy(self._household_sizes) else: ages = self.age household_sizes = self._household_sizes household_age_distribution( ages=ages, household_sizes=household_sizes, filepath=f'{check_plot_dir}/household-distributions.png') age_distribution(ages, filepath=f'{check_plot_dir}/age-distributions.png')