def image_complexity(self, X, masks): # 복잡도 기준 threshold = 25.0 # reshape #masks = cp.reshape(masks, masks.shape[:-1]) R, G, B = X[:, :, :, 0:1], X[:, :, :, 1:2], X[:, :, :, 2:] # compute rg = R - G rg = cp.absolute(R - G) # compute yb = 0.5 * (R + G) - B yb = cp.absolute(0.5 * (R + G) - B) _masks = cp.logical_not(masks) # compute the mean and standard deviation of both `rg` and `yb` () # no masked_where on cupy rb_masked = np.ma.masked_where(_masks, rg) (rb_mean, rb_std) = (cp.mean(rb_masked, axis=(1, 2, 3)), cp.std(rb_masked, axis=(1, 2, 3))) yb_masked = np.ma.masked_where(_masks, yb) (yb_mean, yb_std) = (cp.mean(yb_masked, axis=(1, 2, 3)), cp.std(yb_masked, axis=(1, 2, 3))) # combine the mean and standard deviations std_root = cp.sqrt((rb_std**2) + (yb_std**2)) mean_root = cp.sqrt((rb_mean**2) + (yb_mean**2)) # derive the "colorfulness" metric and return it complexity = std_root + (0.3 * mean_root) # 수치 수정 print('image_complexity Done.') return (complexity >= threshold).astype(cp.uint8)
def sortBatches2(ccb0): # takes as input a matrix of nBatches by nBatches containing # dissimilarities. # outputs a matrix of sorted batches, and the sorting order, such that # ccb1 = ccb0(isort, isort) # put this matrix on the GPU ccb0 = cp.asarray(ccb0, order='F') # compute its svd on the GPU (this might also be fast enough on CPU) u, s, v = svdecon(ccb0) # HACK: consistency with MATLAB u = u * cp.sign(u[0, 0]) v = v * cp.sign(u[0, 0]) # initialize the positions xs of the batch embeddings to be very small but proportional to # the first PC xs = .01 * u[:, 0] / cp.std(u[:, 0], ddof=1) # 200 iterations of gradient descent should be enough # TODO: move_to_config niB = 200 # this learning rate should usually work fine, since it scales with the average gradient # and ccb0 is z-scored # TODO: move_to_config eta = 1 for k in tqdm(range(niB), desc="Sorting %d batches" % ccb0.shape[0]): # euclidian distances between 1D embedding positions ds = (xs - xs[:, np.newaxis])**2 # the transformed distances go through this function W = cp.log(1 + ds) # the error is the difference between ccb0 and W err = ccb0 - W # ignore the mean value of ccb0 err = err - cp.mean(err, axis=0) # backpropagate the gradients err = err / (1 + ds) err2 = err * (xs[:, np.newaxis] - xs) D = cp.mean( err2, axis=1) # one half of the gradients is along this direction E = cp.mean(err2, axis=0) # the other half is along this direction # we don't need to worry about the gradients for the diagonal because those are 0 # final gradients for the embedding variable dx = -D + E.T # take a gradient step xs = xs - eta * dx # sort the embedding positions xs isort = cp.argsort(xs, axis=0) # sort the matrix of dissimilarities ccb1 = ccb0[isort, :][:, isort] return ccb1, isort
def noiser(self, pitch_track, SNR): self.clean = cp.empty((self.size)) self.clean[:] = self.data RMS = cp.std(self.data[pitch_track > 0]) noise = cp.random.normal(0, RMS / (10**(SNR / 20)), self.size) self.data += noise
def normalize_array(A): mean, std = np.mean(A), np.std(A) A_normed = (A - mean) / std def restore_function(X): return X * std + mean return A_normed.astype(np.float32), restore_function
def mass2_gpu(ts, query): """ Compute the distance profile for the given query over the given time series. This require cupy to be installed. Parameters ---------- ts : array_like The array to create a rolling window on. query : array_like The query. Returns ------- An array of distances. Raises ------ ValueError If ts is not a list or np.array. If query is not a list or np.array. If ts or query is not one dimensional. """ def moving_mean_std_gpu(a, w): s = cp.concatenate([cp.array([0]), cp.cumsum(a)]) sSq = cp.concatenate([cp.array([0]), cp.cumsum(a**2)]) segSum = s[w:] - s[:-w] segSumSq = sSq[w:] - sSq[:-w] movmean = segSum / w movstd = cp.sqrt(segSumSq / w - (segSum / w)**2) return (movmean, movstd) x = cp.asarray(ts) y = cp.asarray(query) n = x.size m = y.size meany = cp.mean(y) sigmay = cp.std(y) meanx, sigmax = moving_mean_std_gpu(x, m) meanx = cp.concatenate([cp.ones(n - meanx.size), meanx]) sigmax = cp.concatenate([cp.zeros(n - sigmax.size), sigmax]) y = cp.concatenate((cp.flip(y, axis=0), cp.zeros(n - m))) X = cp.fft.fft(x) Y = cp.fft.fft(y) Z = X * Y z = cp.fft.ifft(Z) dist = 2 * (m - (z[m - 1:n] - m * meanx[m - 1:n] * meany) / (sigmax[m - 1:n] * sigmay)) dist = cp.sqrt(dist) return cp.asnumpy(dist)
def fix(self): if self.PITCH_HALF > 0: nz_pitch = self.samp_values[self.samp_values > 0] idx = self.samp_values < (cp.mean(nz_pitch) - self.PITCH_HALF_SENS * cp.std(nz_pitch)) if self.PITCH_HALF == 1: self.samp_values[idx] = 0 elif self.PITCH_HALF == 2: self.samp_values[idx] = 2 * self.samp_values[idx] if self.PITCH_DOUBLE > 0: nz_pitch = self.samp_values[self.samp_values > 0] idx = self.samp_values > ( cp.mean(nz_pitch) + self.PITCH_DOUBLE_SENS * cp.std(nz_pitch)) if self.PITCH_DOUBLE == 1: self.samp_values[idx] = 0 elif self.PITCH_DOUBLE == 2: self.samp_values[idx] = 0.5 * self.samp_values[idx]
def std_data(X): mu = cp.mean(X, axis=1, keepdims=True) sigma = cp.std(X, axis=1, keepdims=True) X = (X - mu) / sigma stdval_cache = {"mu": mu, "sigma": sigma} return X, stdval_cache
def std(inp) -> 'Tensor': _check_tensors(inp) engine = _get_engine(inp) return _create_tensor( inp, data=engine.std(inp.data), func=wrapped_partial(std_backward, inp=inp) )
def stablesoftmax(x): """Compute the softmax of vector x in a numerically stable way.""" assert x.ndim == 2 classes = x.shape[1] x = x - np.mean(x, keepdims=True) x = x / np.std(x, keepdims=True) x = np.exp(x) x = x / np.sum(x, -1, keepdims=True) return x
def norm_percentile(signals, p1=5, p2=95, pcnt=True): # n signals array are of dim (n,T) out = cp.zeros(signals.shape) for i in range(signals.shape[0]): s = signals[i, :] if pcnt == True: [n, m] = cp.percentile(s, [p1, p2]) out[i, :] = (s - n) / (m - n) else: out[i, :] = (s - cp.mean(s)) / cp.std(s) return out
def softmax_derivative(x): assert x.ndim == 2 classes = x.shape[1] x = x - np.mean(x, keepdims=True) x = x / np.std(x, keepdims=True) out = np.zeros((x.shape[0], classes, classes)) for i in range(classes): for j in range(classes): out[:, i, j] = x[:, i] * (1 - x[:, i]) if (i == j) else -x[:, i] * x[:, j] return out
def evolve(organism, organism_loss, scale, size, learning_rate=0.001): flat_organism = flatten_organism(organism) loc = flat_organism.mean(axis=0, keepdims=True) n_organisms = flat_organism.shape[0] N = flat_organism - loc A = (organism_loss - cp.mean(organism_loss)) / (1e-6 + cp.std(organism_loss)) loc = loc - learning_rate / (n_organisms * scale) * cp.dot(N.T, A) N = cp.random.normal(loc=0, scale=scale, size=(int(n_organisms / 2), flat_organism.shape[1])) new_flat_organism = cp.concatenate((loc + N, loc - N), axis=0) return reform_organism(new_flat_organism, size), loc, N
def normalize(data, mask=None, padding=0, return_space='cpu'): """ Apply normalization on GPU Applies normalisation (data - mean) / stdev Args: data (np/cp.array): Data to preprocess mask (np.cp.array): 1D Channel mask for RFI flagging padding (int): size of edge region to discard (e.g. coarse channel edges) return_space ('cpu' or 'gpu'): Returns array in CPU or GPU space Returns: d_gpu (cp.array): Normalized data """ # Copy over to GPU if required d_gpu = cp.asarray(data.astype('float32', copy=False)) d_gpu_flagged = cp.asarray(data.astype('float32', copy=True)) paddingu = None if padding == 0 else -padding # Need to correct stats N_flagged = 0 N_tot = np.product(d_gpu[..., padding:paddingu].shape) if mask is not None: # Convert 1D-mask to match data dimensions mask_gpu = cp.repeat(cp.asarray(mask.reshape((1, 1, len(mask)))), d_gpu.shape[0], axis=0) cp.putmask(d_gpu_flagged, mask_gpu, 0) N_flagged = mask_gpu[..., padding:paddingu].sum() # Normalise t0 = time.time() # Compute stats based off flagged arrays d_mean = cp.mean(d_gpu_flagged[..., padding:paddingu]) d_std = cp.std(d_gpu_flagged[..., padding:paddingu]) flag_correction = N_tot / (N_tot - N_flagged) d_mean = d_mean * flag_correction d_std = d_std * np.sqrt(flag_correction) logger.debug(f"flag fraction correction factor: {flag_correction}") # Apply to original data d_gpu = (d_gpu - d_mean) / d_std t1 = time.time() logger.info(f"Normalisation time: {(t1-t0)*1e3:2.2f}ms") if return_space == 'cpu': return cp.asnumpy(d_gpu) else: return d_gpu
def load_mc(self, t_ij): for j, (l, h) in enumerate( zip(self.observables.lows, self.observables.highs)): in_bounds = np.logical_and(t_ij[:, j] > l, t_ij[:, j] < h) t_ij = t_ij[in_bounds] self.t_ij = cp.asarray(t_ij) self.w_i = cp.ones(t_ij.shape[0]) if self.bootstrap_binning is not None: counts, _ = cp.histogramdd(cp.asarray(self.t_ij), bins=self.bin_edges, weights=cp.asarray(self.w_i)) self.counts = (cp.asarray(counts).flatten() / self.bin_vol / cp.sum(cp.asarray(counts))).reshape(counts.shape) self.sigma_j = cp.std(self.t_ij, axis=0) self.h_ij = self._adapt_bandwidth() for j, (l, h, refl) in enumerate( zip(self.observables.lows, self.observables.highs, self.reflect_axes)): if not refl: continue if type(refl) == tuple: low, high = refl mask = self.t_ij[:, j] < low t_ij_reflected_low = cp.copy(self.t_ij[mask, :]) h_ij_reflected_low = self.h_ij[mask, :] w_i_reflected_low = self.w_i[mask, :] t_ij_reflected_low[:, j] = 2 * l - t_ij_reflected_low[:, j] mask = self.t_ij[:, j] > high t_ij_reflected_high = cp.copy(self.t_ij[mask, :]) h_ij_reflected_high = self.h_ij[mask, :] w_i_reflected_high = self.w_i[mask, :] t_ij_reflected_high[:, j] = 2 * h - t_ij_reflected_high[:, j] else: t_ij_reflected_low = cp.copy(self.t_ij) h_ij_reflected_low = self.h_ij w_i_reflected_low = self.w_i t_ij_reflected_low[:, j] = 2 * l - self.t_ij[:, j] t_ij_reflected_high = cp.copy(self.t_ij) h_ij_reflected_high = self.h_ij w_i_reflected_high = self.w_i t_ij_reflected_high[:, j] = 2 * h - self.t_ij[:, j] self.t_ij = cp.concatenate( [self.t_ij, t_ij_reflected_low, t_ij_reflected_high]) self.h_ij = cp.concatenate( [self.h_ij, h_ij_reflected_low, h_ij_reflected_high]) self.w_i = cp.concatenate( [self.w_i, w_i_reflected_low, w_i_reflected_high]) self.t_ij = cp.ascontiguousarray(self.t_ij) self.h_ij = cp.ascontiguousarray(self.h_ij) self.w_i = cp.ascontiguousarray(self.w_i)
def get_spike_amps(s, return_counts=False): sig = cupy.asarray(s[:1024, :20000]) peaks = cusignal.peak_finding.peak_finding.argrelmin(sig, order=20, axis=1) mean_std = cupy.mean(cupy.std(sig, axis=1)) significant_peaks = sig[peaks[0], peaks[1]] < (-10 * mean_std) amps = np.median(cupy.asnumpy(sig[:, peaks[1][significant_peaks]] * -1), axis=1) if return_counts: sig_peak_chans = peaks[0][significant_peaks] chan_count = np.array( [len(sig_peak_chans[sig_peak_chans == i]) for i in range(1024)]) return amps, chan_count else: return amps
def sk_flag(data, metadata, n_sigma_upper=5, n_sigma_lower=5, flag_upper=True, flag_lower=True): """ Apply spectral kurtosis flagging Args: data (np.array): Numpy array with shape (N_timestep, N_beam, N_channel) metadata (dict): Metadata dictionary, should contain 'df' and 'dt' (frequency and time resolution) boxcar_mode (str): Boxcar mode to apply. mean/sum/gaussian. n_sigma_upper (float): Number of stdev above SK estimate to flag (upper bound) n_sigma_lower (float): Number of stdev below SK estmate to flag (lower bound) flag_upper (bool): Flag channels with large SK (highly variable signals) flag_lower (bool): Flag channels with small SK (very stable signals) return_space ('cpu' or 'gpu'): Returns array in CPU or GPU space Returns: mask (np.array, bool): Array of True/False flags per channel Notes: sk_flag upper and lower stdev is computed on log2(sk), as the minimum spectral kurtosis (for a CW signal) approaches 0. """ Fs = (1.0 / metadata['frequency_step'] / 2) samps_per_sec = np.abs(Fs.to('s').value) # Nyq sample rate for channel N_acc = int(metadata['time_step'].to('s').value / samps_per_sec) sk = spectral_kurtosis(data, metadata) #var_theoretical = 2.0 / np.sqrt(N_acc) #std_theoretical = np.sqrt(var_theoretical) log_sk = cp.log2(sk) std_log = cp.std(log_sk) mean_log = cp.mean(log_sk) if flag_upper and flag_lower: mask = log_sk < mean_log + (std_log * n_sigma_upper) mask &= log_sk > mean_log - (std_log * n_sigma_lower) elif flag_upper and not flag_lower: mask = log_sk > mean_log + (std_log * n_sigma_upper) elif flag_lower and not flag_upper: mask = log_sk < mean_log - (std_log * n_sigma_lower) else: raise RuntimeError( "No flags to process: need to flag upper and/or lower!") return ~mask
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu-id', '-g', default=0, type=int, help='GPU ID') parser.add_argument('--n-options', default=1000, type=int) parser.add_argument('--n-samples-per-thread', default=1000, type=int) parser.add_argument('--n-threads-per-option', default=100000, type=int) args = parser.parse_args() cupy.cuda.Device(args.gpu_id).use() def rand_range(m, M): samples = cupy.random.rand(args.n_options) return (m + (M - m) * samples).astype(numpy.float64) print('initializing...') stock_price = rand_range(5, 30) option_strike = rand_range(1, 100) option_years = rand_range(0.25, 10) risk_free = 0.02 volatility = 0.3 @contextlib.contextmanager def timer(message): cupy.cuda.Stream.null.synchronize() start = time.time() yield cupy.cuda.Stream.null.synchronize() end = time.time() print('%s:\t%f sec' % (message, end - start)) print('start computation') print(' # of options: {}'.format(args.n_options)) print(' # of samples per option: {}'.format(args.n_samples_per_thread * args.n_threads_per_option)) with timer('GPU (CuPy, Monte Carlo method)'): call_mc = compute_option_prices(stock_price, option_strike, option_years, risk_free, volatility, args.n_threads_per_option, args.n_samples_per_thread) # Compute the error between the value of the exact solution # and that of the Monte-Carlo simulation call_bs, _ = black_scholes_kernel(stock_price, option_strike, option_years, risk_free, volatility) error = cupy.std(call_mc - call_bs) print('Error: %f' % error) return 0
def fit(self, X, y, Xt=None, yt=None): if self.params['norm']: Xmean, Xstd = cp.mean(X, axis=0, keepdims=True), cp.std(X, axis=0, keepdims=True) X = (X - Xmean) / (Xstd + 1e-5) self.Xmean, self.Xstd = Xmean, Xstd if Xt is None: test_size = self.params['validation_fraction'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size) train_dataset = MyDataSet(X_train, y_train, task=self.task) valid_dataset = MyDataSet(X_test, y_test, task=self.task) Ntr,Nva = X_train.shape[0], X_test.shape[0] else: if self.params['norm']: Xt = (Xt - Xmean) / (Xstd + 1e-5) train_dataset = MyDataSet(X, y, task=self.task) valid_dataset = MyDataSet(Xt, yt, task=self.task) Ntr,Nva = X.shape[0], Xt.shape[0] batch_size = self.get_batch_size(N=Ntr, mb=1024) train_dataloader = MyDataLoader(train_dataset, batch_size=batch_size, shuffle=self.params['shuffle'], drop_last=True) batch_size = self.get_batch_size(N=Nva, mb=1024) valid_dataloader = MyDataLoader(valid_dataset, batch_size=batch_size, shuffle=False, drop_last=False) C = 1 if self.task == 'regression' else cp.unique(y).shape[0] model = MLP(X.shape[1], self.params['hidden_layer_sizes'], C, **self.params) if self.task == 'regression': loss_func = torch.nn.functional.mse_loss elif self.task == 'classification': loss_func = torch.nn.functional.cross_entropy else: assert 0, "Unknown taks: "+self.task learner = Learner(model, loss_func, **self.params) learner.fit(train_dl=train_dataloader, valid_dl=valid_dataloader) self.learner = learner
def zdist(Q, S, mode="fft", epsilon=1e-6): """ Rolling mean- and amplitude-adjusted Euclidean Distance Arguments: ------- Q: cupy.core.core.ndarray or numba.cuda.DeviceNDArray or cudf.Series or numpy.ndarray the input query of length m to be aligned S: cupy.core.core.ndarray or numba.cuda.DeviceNDArray or cudf.Series or numpy.ndarray the input stream of length n>=m to be scanned epsilon: float non-negative number for regularizing zero stdev mode: str either "naive" or "fft" Returns ------- cupy.core.core.ndarray the computed distance array of length n-m+1 """ if not isinstance(Q, cp.core.core.ndarray): Q = cp.asarray(Q) if not isinstance(S, cp.core.core.ndarray): S = cp.asarray(S) assert (epsilon > 0) assert (Q.dtype == S.dtype) assert ((len(Q.shape) == len(S.shape) == 1 and Q.shape[0] <= S.shape[0])) assert (cp.std(Q, ddof=0) > 0) if mode == "fft": Z = fft_zdist(Q, S, epsilon) else: stream = cuda.stream() Z = cp.empty(len(S) - len(Q) + 1, dtype=Q.dtype) zdist_kernel[80 * 32, 64, stream](znorm(Q, epsilon), S, Z, epsilon) stream.synchronize() return Z
def znorm(x, epsilon): """ Mean- and amplitude-adjustment of a given time series. Afterwards the time series has vanishing mean, i.e. sum_i x[i] = 0 and unit standard devitation i.e. sum_i x[i]*x[i] = n where n is the length of the sequence x Arguments: ------- x: cupy.core.core.ndarray the input array of length n to be normalized Returns ------- cupy.core.core.ndarray the mean-adjusted array of length n """ return (x-cp.mean(x))/max(cp.std(x, ddof=0), epsilon)
def collect_test_losses(num_folds): # Run this after CV results are in. e.g: # python -c "from deepmolecule.util import collect_test_losses; collect_test_losses(10)" results = {} for net_type in ['conv', 'morgan']: results[net_type] = [] for expt_ix in range(num_folds): fname = "Final_test_loss_{0}_{1}.pkl.save".format( expt_ix, net_type) try: with open(fname) as f: results[net_type].append(pickle.load(f)) except IOError: print("Couldn't find file {0}".format(fname)) print("Results are:") print(results) print("Means:") print({k: np.mean(v) for k, v in results.items()}) print("Std errors:") print({k: np.std(v) / np.sqrt(len(v) - 1) for k, v in results.items()})
def run_gpu(X, eps, min_samples): # Transfer inputs to GPU X = cp.array(X) # Begin computation t0 = time.time() mean = cp.mean(X, axis=0) std = cp.std(X, axis=0) cp.subtract(X, mean, out=X) cp.divide(X, std, out=X) print('Preprocessing:', time.time() - t0) # Run DBSCAN db = cuml.DBSCAN(eps=eps, min_samples=min_samples) db = db.fit(X) labels = db.labels_ # Transfer outputs to CPU # labels = labels.to_pandas().to_numpy() labels = cp.asnumpy(labels) return labels
def filter_and_whiten(raw_traces, params, probe, whitening_matrix): sample_rate = params.fs high_pass_freq = params.fshigh low_pass_freq = params.fslow scaleproc = params.scaleproc whitening_matrix_np = cp.asarray(whitening_matrix, dtype=np.float32) / np.float(scaleproc) filtered_data = gpufilter(buff=cp.asarray(raw_traces, dtype=np.float32), chanMap=probe.chanMap, fs=sample_rate, fslow=low_pass_freq, fshigh=high_pass_freq) whitened_data = cp.dot(filtered_data, whitening_matrix_np) array_means = cp.mean(whitened_data, axis=0) array_stds = cp.std(whitened_data, axis=0) whitened_array = (whitened_data - array_means) / array_stds return whitened_array.get()
def to_periodogram(signal): """ Returns periodogram of signal for finding frequencies that have high energy. :param signal: signal (time domain) :type signal: cudf.Series :return: CuPy array representing periodogram :rtype: cupy.core.core.ndarray """ # convert cudf series to cupy array signal_cp = cp.fromDlpack(signal.to_dlpack()) # standardize the signal signal_cp_std = (signal_cp - cp.mean(signal_cp)) / cp.std(signal_cp) # take fourier transform of signal FFT_data = cp.fft.fft(signal_cp_std) # create periodogram prdg = (1 / len(signal)) * ((cp.absolute(FFT_data))**2) return prdg
def estimate_stats(voltages, stats_calc_num_samples=10000): """ Estimate mean and standard deviation, truncating to at most `stats_calc_num_samples` samples to reduce computation. Parameters ---------- voltages : array Array of voltages stats_calc_num_samples : int, optional Maximum number of samples for use in estimating noise statistics Returns ------- data_mean : float Mean of voltages data_sigma : float Standard deviation of voltages """ calc_len = xp.amin(xp.array([stats_calc_num_samples, len(voltages)])) data_sigma = xp.std(voltages[:calc_len]) data_mean = xp.mean(voltages[:calc_len]) return data_mean, data_sigma
h = self.bn_b(self.convb(h)) if self.proj: x = self.bn_r(self.convr(x)) return F.relu(h + x) train, test = cifar.get_cifar10() train_iter = iterators.SerialIterator(train, 128) test_iter = iterators.SerialIterator(test, 2000, repeat=False, shuffle=False) gpu_id = 0 Itrain, Ttrain = concat_examples(train, gpu_id) Train_mean = cp.mean(Itrain) Train_std = cp.std(Itrain) model = ResNet34().to_gpu(gpu_id) optimizer = optimizers.MomentumSGD(lr=0.1, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001)) max_epoch = 50000 plt.figure(figsize=(7, 5)) test_acc_array = [] train_acc_array = [] while train_iter.epoch < max_epoch:
def standardization(data): mu = np.mean(data, axis=0) sigma = np.std(data, axis=0) return (data - mu) / sigma
return Array._new(np.prod(x._array, axis=axis, keepdims=keepdims)) def std( x: Array, /, *, axis: Optional[Union[int, Tuple[int, ...]]] = None, correction: Union[int, float] = 0.0, keepdims: bool = False, ) -> Array: # Note: the keyword argument correction is different here if x.dtype not in _floating_dtypes: raise TypeError("Only floating-point dtypes are allowed in std") return Array._new( np.std(x._array, axis=axis, ddof=correction, keepdims=keepdims)) def sum( x: Array, /, *, axis: Optional[Union[int, Tuple[int, ...]]] = None, dtype: Optional[Dtype] = None, keepdims: bool = False, ) -> Array: if x.dtype not in _numeric_dtypes: raise TypeError("Only numeric dtypes are allowed in sum") # Note: sum() and prod() always upcast integers to (u)int64 and float32 to # float64 for dtype=None. `np.sum` does that too for integers, but not for # float32, so we need to special-case it here
def forward_conv(A_previous, Filter, Bias, pad, stride, function = 'identity', verbose = False): ''' A forward convolution step. Calcul output shape : ((x-f+2*pad)/stride)+1 Parameters ---------- A_previous : cp.array(examples, height, width, depth) Input images from the previous layer. Filter : cp.array(f, f, depth, number of filter) Filter to convolve with the input image. Bias : cp.array(1, 1, 1, number of filter) Bias for each filter. pad : int Padding edge width. stride : int Stride number. Returns ------- Z : cp.array(examples, ((h-f+2*pad)/stride)+1, ((w-f+2*pad)/stride)+1), number of filter) Output layer image. ''' (m, n_H_prev, n_W_prev, n_C_prev) = A_previous.shape (f, f, n_C_prev, n_C) = Filter.shape mu = cp.mean(Filter) s = cp.std(Filter) Filter = (Filter-mu)/(s+1e-5) n_H = int(((n_H_prev-f+2*pad)/stride)+1) n_W = int(((n_W_prev-f+2*pad)/stride)+1) Z = cp.zeros([m, n_H, n_W, n_C]) A_prev_pad = cp.pad(A_previous, ((0,0), (pad,pad), (pad,pad), (0,0),), mode='constant', constant_values = (0,0)) i0 = cp.repeat(cp.arange(f), f) i1 = stride * cp.repeat(cp.arange(n_W), n_H) j0 = cp.tile(cp.arange(f), f) j1 = stride * cp.tile(cp.arange(n_H), n_W) i = cp.reshape(i0, (-1, 1))+cp.reshape(i1, (1, -1)) j = cp.reshape(j0, (-1, 1))+cp.reshape(j1, (1, -1)) k = cp.reshape(cp.repeat(cp.arange(n_C_prev), f**2), (-1, 1)) Ztest = A_prev_pad[:, i, j, :] weights = cp.reshape(Filter, (f**2, n_C_prev, n_C)) conV = cp.tensordot(weights, Ztest, ((0, 1), (1, 3))) Z = cp.reshape(cp.transpose(conV, (1, 2, 0)), (m, n_H, n_W, n_C)) + Bias Z = activation('forward', function, Z) if(verbose): print("Filter :") print(Filter) print("Weights :") print(weights) print("Z :") print(Ztest) print("Conv :") print(conV) print("Result :") print(Z) ''' for i in range(m): a_prev_pad = A_prev_pad[i, :, :, :] for h in range(n_H): vert_start = h*stride vert_end = h*stride+f for w in range(n_W): horiz_start = w*stride horiz_end = w*stride+f a_slice_prev = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] for c in range(n_C): Z[i, h, w, c] = cp.squeeze(cp.sum(a_slice_prev*Filter[:, :, :, c])+Bias[:, :, :, c]) ''' return Z
def forward_function(A_previous, W, mu, sigma, gamma, beta, function, dropout = 1): ''' A forward function step Parameters ---------- A_previous : cp.array(in_dim, examples) Result of the previous layer. W : cp.array(out_dim, in_dim) Weight matrix. mu : cp.array(out_dim, number of epochs) Gather Z mean over epochs. sigma : cp.array(out_dim, number of epochs) Gather Z std over epochs. gamma : cp.array(out_dim, 1) Weight matrix. beta : cp.array(out_dim, 1) Bias matrix. function : string The desired activation function. dropout : float (in (0,1)), optional Percentage of disabled neurons. The default is 1. Returns ------- A : cp.array(out_dim, examples) Output layer result. z : cp.array(out_dim, examples) Activation function input. zhat : cp.array(out_dim, examples) Normalized Z. Z : cp.array(out_dim, examples) Result after applying W weights. mu : cp.array(out_dim, number of epochs) Updated mu. sigma : cp.array(out_dim, number of epochs) Updated sigma. D : cp.array(out_dim, 1) Mask matrix for dropout (filled with ones if dropout = 1). ''' m = A_previous.shape[1] eps = 1e-8 Z = cp.dot(W, A_previous) if mu.any(): mu = cp.concatenate((mu, cp.mean(Z, axis = 1, keepdims = True)), axis = 1) sigma = cp.concatenate((sigma, cp.std(Z, axis = 1, keepdims = True)), axis = 1) else : mu = cp.mean(Z, axis = 1, keepdims = True) sigma = cp.std(Z, axis = 1, keepdims = True) zhat = ((Z-cp.mean(mu, axis = 1, keepdims = True)) /(((m/((m-1)+eps))*cp.mean(sigma, axis = 1, keepdims = True))+eps)) z = (gamma*zhat)+beta A = activation('forward', function, z) D = cp.random.rand(A.shape[0], A.shape[1]) D = (D <= dropout).astype(int) A = A*D A = A/dropout return A, z, zhat, Z, mu, sigma, D