def normalize_artifact_candidates(feats_stim, robust=False): """ Use z-score to normalize candidates. Parameters ------- feats_stim: np.ndarray, shape: [n_sample x n_chan x n_candidates] Clipped artifact candidates from the original signal. robust: bool, default: False Set to True to use robust z-score based on medians. Returns ------- feats_stim_z: np.ndarray, shape: [n_sample x n_chan x n_candidates] Clipped artifact candidates from the original signal after normalization using a robust z-score. """ # Compute the robust z-Score if robust: return (feats_stim - np.median(feats_stim, axis=0)) / \ sp_stats.median_abs_deviation(feats_stim, axis=0) else: return sp_stats.zscore(feats_stim, axis=0)
def flux_bootstrap(src_flux, src_flux_err, bkg_flux, bkg_flux_err, nsim=1000): nstack, nbands = src_flux.shape flux = np.zeros((nsim, nbands)) flux_err = np.zeros((nsim, nbands)) for i in range(nsim): idx_sample = np.random.randint(nstack, size=nstack) ngood = np.zeros(nbands, dtype=int) for j in range(nbands): good_idx = np.where(np.isfinite(src_flux[idx_sample, j])) ngood[j] = len(good_idx[0]) flux[i, :] = (np.nansum(src_flux[idx_sample, :], axis=0) - np.nansum(bkg_flux[idx_sample, :], axis=0)) / ngood flux_err[i, :] = np.sqrt( np.nansum(src_flux_err[idx_sample, :]**2 + bkg_flux_err[idx_sample, :]**2, axis=0)) / ngood flux_median = np.median(flux, axis=0) flux_err_median = np.median(flux_err, axis=0) flux_mad = median_abs_deviation(flux, axis=0, scale="normal") return flux_median, flux_mad
def extract_statistics(transformed: np.ndarray) -> np.ndarray: """Extract median and deviation statistics from the transformed ECG signals.""" ecg_features = [] print("Extracting statistics from transformed signals...") for x in tqdm(transformed): median_temp = np.median(x[:, :-1], axis=0) mad_temp = median_abs_deviation(x[:, :-1], axis=0) median_hr = np.median(x[:, -1], keepdims=True) mad_hr = median_abs_deviation(x[:, -1]).reshape([-1]) features = np.concatenate([median_temp, mad_temp, median_hr, mad_hr]) ecg_features.append(features) return np.array(ecg_features)
def test_step(self, batch, batch_idx): x, x1d, _y, _y_raw, dates = batch _y_hat = self(x, x1d) y = _y.detach().cpu().clone().numpy() y_raw = _y_raw.detach().cpu().clone().numpy() y_hat = _y_hat.detach().cpu().clone().numpy() y_hat2 = relu_mul( np.array(self.test_dataset.inverse_transform(y_hat, dates))) _loss = self.loss(_y_raw, torch.as_tensor(y_hat2).to(device)) _mae = mean_absolute_error(y_raw, y_hat2) _mse = mean_squared_error(y_raw, y_hat2) _r2 = r2_score(y_raw, y_hat2) _mad = median_abs_deviation(y_raw - y_hat2) return { 'loss': _loss, 'obs': y_raw, 'sim': y_hat2, 'dates': dates, 'metric': { 'MSE': _mse, 'MAE': _mae, 'MAD': _mad, 'R2': _r2 } }
def smad(freq_time, sigma=3, clip=True): """ Spectral Median Absoulte Deviation filter to clip rfi Args: freq_time: freq_time/dynamic spectra to be filtered sigma: sigma to clip at clip: clip the values to the given sigma Returns: Dynamic Spectra with the values clipped """ # mads = stats.median_absolute_deviation(freq_time, axis=0) # threshold=1.4826*sigma # for j,k in enumerate(mads): # cut = threshold*k # if clip: # freq_time[freq_time[:,j]>=cut,j]=cut # freq_time[freq_time[:, j]<=-cut, j]=cut # return freq_time medians = np.median(freq_time, axis=0) sigs = sigma * stats.median_abs_deviation( freq_time, axis=0, scale='normal') if clip: return np.clip(freq_time, a_min=medians - sigs, a_max=medians + sigs) else: for j, sig in enumerate(sigs): freq_time[np.absolute(freq_time[:, j] - medians[j]) >= sig, j] = 0.0 return freq_time
def sk_filter(data, channel_bandwidth, tsamp, N=None, d=None, sigma=5): """ Apply Spectral Kurtosis filter to the data Args: data (numpy.ndarray): 2D frequency time data channel_bandwidth (float): channel bandwidth (MHz) tsamp (float): sampling time (seconds) N (int): Number of accumulations on the FPGA d (float): shape factor sigma (float): sigma value to apply cutoff on Returns: numpy.ndarray: mask for channels """ if not N: N = calc_N(channel_bandwidth, tsamp) sk = spectral_kurtosis(data, d=d, N=N) nan_mask = np.isnan(sk) sk[nan_mask] = np.nan sk_c = sk[~nan_mask] std = 1.4826 * stats.median_abs_deviation(sk_c) h = np.median(sk_c) + sigma * std l = np.median(sk_c) - sigma * std mask = (sk < h) & (sk > l) return ~mask
def median_ad(candles: np.ndarray, period: int = 5, source_type: str = "hl2", sequential: bool = False) -> Union[float, np.ndarray]: """ Median Absolute Deviation :param candles: np.ndarray :param period: int - default: 5 :param source_type: str - default: "hl2" :param sequential: bool - default=False :return: float | np.ndarray """ if len(candles.shape) == 1: source = candles else: candles = slice_candles(candles, sequential) source = get_candle_source(candles, source_type=source_type) swv = sliding_window_view(source, window_shape=period) median_abs_deviation = stats.median_abs_deviation(swv, axis=-1) res = same_length(source, median_abs_deviation) return res if sequential else res[-1]
def test_step(self, batch, batch_idx): x, _y, _y_raw, dates = batch _y_hat = self(x) _loss = self.loss(_y_hat, _y) # transformed y might be smoothed y = _y.detach().cpu().clone().numpy() y_raw = _y_raw.detach().cpu().clone().numpy() y_hat = _y_hat.detach().cpu().clone().numpy() y_hat_inv = np.array(self.test_dataset.inverse_transform(y_hat, dates)) _mae = mean_absolute_error(y_raw, y_hat_inv) _mse = mean_squared_error(y_raw, y_hat_inv) _r2 = r2_score(y_raw, y_hat_inv) _mad = median_abs_deviation(y_raw - y_hat_inv) return { 'loss': _loss, 'obs': y_raw, 'sim': y_hat_inv, 'dates': dates, 'metric': { 'MSE': _mse, 'MAE': _mae, 'MAD': _mad, 'R2': _r2 } }
def measure_rms(coord, data, img_wcs, annulus_radius=0.1 * u.arcsecond): pixel_scale = np.abs( img_wcs.pixel_scale_matrix.diagonal().prod())**0.5 * u.deg annulus_radius_pix = (annulus_radius.to(u.degree) / pixel_scale).decompose() annulus_width = 15 #pix center_coord_pix = coord.to_pixel(img_wcs) cutout = Cutout2D(data, center_coord_pix, annulus_radius * 2.5, img_wcs, mode='partial') cutout_center = regions.PixCoord(cutout.center_cutout[0], cutout.center_cutout[1]) innerann_reg = regions.CirclePixelRegion(cutout_center, annulus_radius_pix.value) outerann_reg = regions.CirclePixelRegion( cutout_center, annulus_radius_pix.value + annulus_width) innerann_mask = innerann_reg.to_mask() annulus_mask = mask(outerann_reg, cutout) - mask(innerann_reg, cutout) # Calculate the SNR and aperture flux sums pixels_in_annulus = cutout.data[annulus_mask.astype('bool')] bg_rms = median_abs_deviation(pixels_in_annulus) return bg_rms
def extract_features(xyz): ''' Extract timeseries features. xyz is a window of shape (N,3) ''' feats = {} feats['xMean'], feats['yMean'], feats['zMean'] = np.mean(xyz, axis=0) feats['xStd'], feats['yStd'], feats['zStd'] = np.std(xyz, axis=0) feats['xRange'], feats['yRange'], feats['zRange'] = np.ptp(xyz, axis=0) feats['xIQR'], feats['yIQR'], feats['zIQR'] = stats.iqr(xyz, axis=0) x, y, z = xyz.T with np.errstate(divide='ignore', invalid='ignore'): # ignore div by 0 warnings feats['xyCorr'] = np.nan_to_num(np.corrcoef(x, y)[0,1]) feats['yzCorr'] = np.nan_to_num(np.corrcoef(y, z)[0,1]) feats['zxCorr'] = np.nan_to_num(np.corrcoef(z, x)[0,1]) m = np.linalg.norm(xyz, axis=1) feats['mean'] = np.mean(m) feats['std'] = np.std(m) feats['range'] = np.ptp(m) feats['iqr'] = stats.iqr(m) feats['mad'] = stats.median_abs_deviation(m) feats['kurt'] = stats.kurtosis(m) feats['skew'] = stats.skew(m) return feats
def bandpass_fitter(bandpass: np.ndarray, poly_order: int = 20, mask_sigma: float = 6) -> np.ndarray: """ Fits bandpasses by polyfitting the bandpass, looking for channels that are far from this fit, excluding these channels and refitting the bandpass Args: bandpass: the bandpass to fit polyorder: order of polynomial to fit mask_sigma: standard deviation at which to mask outlying channels Returns: Fit to bandpass """ channels = np.arange(0, len(bandpass)) fit_values = np.polyfit(channels, bandpass, poly_order) # fit a polynomial poly = np.poly1d(fit_values) # get the values of the fitted bandpass diff = bandpass - poly( channels) # find the difference between fitted and real bandpass std_diff = stats.median_abs_deviation(diff, scale="normal") logging.debug("Standard Deviation of fit: %f:.4f", std_diff) mask = np.abs(diff - np.median(diff)) < mask_sigma * std_diff fit_values_clean = np.polyfit(channels[mask], bandpass[mask], poly_order) # refit masking the outliers poly_clean = np.poly1d(fit_values_clean) best_fit_bandpass = poly_clean(channels) # Removed below, scipy's stricter test causes chisquare to fail # logging.info( # f"chi^2: {stats.chisquare(bandpass, best_fit_bandpass, poly_order)[0]:.4}" # ) return best_fit_bandpass
def mlla_nondet(): MLLA_nondet = Table.read( '/home/jotter/nrao/summer_research_2018/tables/IR_nondet_may21_full.fits' ) b3_fl = fits.open( '/home/jotter/nrao/images/Orion_SourceI_B3_continuum_r0.5.clean0.05mJy.allbaselines.huge.deepmask.image.tt0.pbcor.fits' ) header = b3_fl[0].header img_wcs = WCS(header) data = b3_fl[0].data beam = radio_beam.Beam.from_fits_header(header) pixel_scale = np.abs( img_wcs.pixel_scale_matrix.diagonal().prod())**0.5 * u.deg #ppbeam = (beam.sr/(pixel_scale**2)).decompose().value MLLA_coord = SkyCoord(ra=MLLA_nondet['RAJ2000'], dec=MLLA_nondet['DEJ2000'], unit=u.degree) annulus_radius = 0.1 * u.arcsecond annulus_radius_pix = (annulus_radius.to(u.degree) / pixel_scale).decompose() annulus_width = 15 #pix ulim_fluxes = [] for ind in range(len(MLLA_coord)): center_coord = MLLA_coord[ind] center_coord_pix = center_coord.to_pixel(img_wcs) cutout = Cutout2D(data, center_coord_pix, annulus_radius * 2.5, img_wcs, mode='partial') cutout_center = regions.PixCoord(cutout.center_cutout[0], cutout.center_cutout[1]) innerann_reg = regions.CirclePixelRegion(cutout_center, annulus_radius_pix.value) outerann_reg = regions.CirclePixelRegion( cutout_center, annulus_radius_pix.value + annulus_width) innerann_mask = innerann_reg.to_mask() annulus_mask = mask(outerann_reg, cutout) - mask(innerann_reg, cutout) # Calculate the SNR and aperture flux sums pixels_in_annulus = cutout.data[annulus_mask.astype('bool')] bg_rms = median_abs_deviation(pixels_in_annulus) ulim_fluxes.append(3 * bg_rms) ulim_fluxes = np.array(ulim_fluxes) * 1000 * u.mJy #in mJy MLLA_nondet['B3_flux_ulim'] = ulim_fluxes MLLA_nondet.write( '/home/jotter/nrao/summer_research_2018/tables/IR_nondet_may21_full_ulim.fits', overwrite=True)
def findsd(relFlux, medianFlux): # Calculate z-score of all points, find outliers below the flux midpoint. z = stats.median_abs_deviation(relFlux) # try: # maximumSD = np.max(z) # except: # maximumSD = np.NaN return z
def median_abs_deviation_compatible_old_scipy(data_loc): try: from scipy.stats import median_abs_deviation return median_abs_deviation(data_loc, axis=1, scale='normal') except: return np.ma.median(np.abs(data_loc - np.tile( np.ma.median(data_loc, axis=1)[:, np.newaxis], (1, np.shape(data_loc)[1]))), axis=1) / 0.67449
def get_normal_stats(all_samples): is_nan_mask = np.logical_or(np.isnan(all_samples), ~np.isfinite(all_samples)) samples = all_samples[~is_nan_mask] mean = np.median(samples) std = median_abs_deviation(samples, axis=None, scale='normal') stats = dict( mean=mean, std=std ) return stats
def get_pred_metrics(y_test, y_pred, no_features): """Calculates performance metrics for point predictions.""" metrics = np.empty(no_features) for feature in np.arange(no_features): nmad = median_abs_deviation(y_pred[:, feature] - y_test[:, feature], scale=1 / 1.4826) metrics[feature] = nmad return metrics
def setMad(self, col_name): """Method to set the median absolute deviation Value(s) set within the data structure for each individual filter within the data :param col_name: Column name on which to apply the summary, e.g. 'mag' :type col_name: str """ series = self.table.groupby("filtercode")[col_name] for name, group in series: self.mad[name] = stats.median_abs_deviation(series.get_group(name))
def _compute(self, mrq): for ind_j, j in enumerate(self.j): T_X_j = np.abs(mrq.values[j]) log_T_X_j = np.log(T_X_j) self.values[0, ind_j] = \ np.median(log_T_X_j) * np.log2(np.exp(1)) self.values[1, ind_j] = \ (median_abs_deviation(log_T_X_j) ** 2) * np.log2(np.exp(1))
def basic_stats(rcs): rcs = dim_reduction(rcs) stat_dict = dict(mean=np.mean(rcs), median=np.median(rcs), stdev=np.std(rcs), var=np.var(rcs), median_abs_dev=stats.median_abs_deviation(rcs), quantile25=np.quantile(rcs, 0.25), quantile50=np.quantile(rcs, 0.50), quantile75=np.quantile(rcs, 0.75), iqr=stats.iqr(rcs)) return stat_dict
def sampleFrequencyAnalysis(self, show_plots = False): timestamps = self.gyroInt.get_raw_data("t") gyro_data = self.gyroInt.get_raw_data("xyz") interarrival = np.diff(timestamps, n=1) w = int(self.gyroInt.gyro_sample_rate/100.0) # aggregate over 1%, e.g., 9 gyro samples for 900Hz/900 samples per second interarrival = np.convolve(interarrival, np.ones(w), 'valid') / w # moving average freqs = 1.0/interarrival median = np.median(freqs) mad = stats.median_abs_deviation(freqs) mad_normal = stats.median_abs_deviation(freqs, scale='normal') std = np.std(freqs) print('Computed sample rate is {}'.format(self.gyroInt.gyro_sample_rate)) print('Median freq is {}'.format(median)) print('Mean freq is {}'.format(np.mean(freqs))) print('Stdev of freqs is {}'.format(std)) print('MAD of freqs is {}'.format(mad)) print('MAD (normal) of freqs is {}'.format(mad_normal)) print('Max inter sample delay is {}'.format(np.max(interarrival))) if show_plots: thresh = mad_normal if mad_normal > std else std thresh = 6*thresh #corresponds to 100% of observations when following normal distribution outlierMask = median - freqs > thresh plt.plot(timestamps, gyro_data) plt.plot(timestamps[:-1-(w-1)], outlierMask*2) plt.show() plt.hist(freqs, bins=300) plt.yscale("log") plt.axvline(x=median+thresh, color="red") plt.axvline(x=median-thresh, color="red") plt.axvline(x=median, color='green') plt.axvline(x=np.mean(freqs), color='orange') plt.show()
def remove_outliers_from_lognormal(data, level=3): """Remove extreme outliers corresponding to level-STD away from the mean Parameters ---------- data : np.array data expected to follow a lognormal distribution """ # Quantiles are preserved under monotonic transformations log_data = np.log(data) robust_mean = np.median(log_data) robust_std = median_abs_deviation(log_data, scale='normal') return data[abs(log_data - robust_mean) < level*robust_std]
def trace_noise_estimate(x: np.ndarray, filt_length: int) -> float: """estimates noise of a signal by detrending with a median filter, removing positive spikes, eliminating outliers, and, using the median absolute deviation estimator of standard deviation. Parameters ---------- x: np.ndarray 1-D array of values filt_length: int passed as size to scipy.ndimage.filters.median_filter Returns ------- float estimate of the standard deviation. """ x = x - median_filter(x, filt_length, mode='nearest') x = x[x < 1.5 * np.abs(x.min())] rstd = median_abs_deviation(x, scale='normal') x = x[np.abs(x) < 2.5 * rstd] return median_abs_deviation(x, scale='normal')
def DE_test(Y, X, gene_names, alpha: float = 0.05): '''Differential gene expression test. Parameters ---------- Y : numpy.array \(n,\) the expression matrix. X : numpy.array \(n,1+1+s\) the constant term, the pseudotime and the covariates. gene_names : numpy.array \(n,\) the names of all genes. alpha : float, optional The cutoff of p-values. Returns ---------- res_df : pandas.DataFrame The test results of expressed genes with two columns, the estimated coefficients and the adjusted p-values. ''' pinv_wexog, singular_values = _pinv_extended(X) normalized_cov = np.dot( pinv_wexog, np.transpose(pinv_wexog)) h = np.diag(np.dot(X, np.dot(normalized_cov,X.T))) def _DE_test(wendog,pinv_wexog,h): if np.any(np.isnan(wendog)): return np.empty(2)*np.nan else: beta = np.dot(pinv_wexog, wendog) resid = wendog - X @ beta cov = _cov_hc3(h, pinv_wexog, resid) t = beta[1]/np.sqrt(np.diag(cov)[1]) return np.r_[beta[1], t] res = np.apply_along_axis(lambda y: _DE_test(wendog=y, pinv_wexog=pinv_wexog, h=h), 0, Y).T if 'median_abs_deviation' in dir(stats): sigma = stats.median_abs_deviation(res[:,1], nan_policy='omit') else: sigma = stats.median_absolute_deviation(res[:,1], nan_policy='omit') pdt_new_pval = stats.norm.sf(np.abs(res[:,1]/sigma))*2 new_adj_pval = _p_adjust_bh(pdt_new_pval) res_df = pd.DataFrame(np.c_[res[:,0], new_adj_pval], index=gene_names, columns=['beta_PDT','p_adjusted']) res_df = res_df[(new_adj_pval< alpha) & np.any(~np.isnan(Y), axis=0)] return res_df
def get_peaks(d): # flatten the overall shape of the input data by subtracting the mean d_shape = medfilt(d, kernel_size=201) d_flat = d - d_shape # use a median filter to smooth the curve, making it easier to detect the true peaks in the signal d_filt = medfilt(d_flat, kernel_size=7) # calculate the median absolute deviation and set the threshold accordingly mad = stats.median_abs_deviation(d) threshold = 5 * mad # find all of the peaks in the data peaks, _ = find_peaks(d_filt, height=threshold, distance=10, width=3) return peaks
def _write_sample_stats(self): """Write summary statistics of samples to sample_stats.csv file.""" data = self.load_data() stats_filename = join(self.output, "sample_stats.csv") with open(stats_filename, "w") as csv_file: writer = csv.writer(csv_file) writer.writerow(['Name', 'mean', 'median', 'MAD']) for num, name in enumerate(self.class_names): sample = data.iloc[:, num] writer.writerow([ name, np.mean(sample), np.median(sample), stats.median_abs_deviation(sample) ])
def prep_emean_data(proj_dir, split=0.15, var_level=10): # Load single pulse data single_inp = pd.read_table( os.path.join(proj_dir, "Data/single_inputs.tsv.gz")) single_out = pd.read_table( os.path.join(proj_dir, "Data/single_outputs.tsv.gz")) # select only fit columns emean_out = single_out.loc[:, ["GaussMean_pxl", "FitMask"]].copy() # get prepared mask emean_mask = emean_out["FitMask"].values fit_nan = emean_out["GaussMean_pxl"].isna().values emean_mask = emean_mask - fit_nan # Also mask NaN values emean_argmask = np.argwhere( emean_mask).flatten() # create arg_mask to apply to inps and outputs # apply masking of events emean_inp = single_inp.iloc[emean_argmask].copy() emean_out = emean_out.loc[emean_argmask, "GaussMean_pxl"] # only select fit copy emean_out = emean_out.apply(lambda x: get_energy(x)) emean_out.name = "GaussMean_eV" # Filter input features by variance feat_columns = [ c for c in emean_inp if len(np.unique(emean_inp[c])) > var_level ] emean_inp = emean_inp[feat_columns] # Get mean absolute deviation of outputs mad_emean = abs((emean_out.values - np.median(emean_out.values)) / sps.median_abs_deviation(emean_out.values)) # create mad and beam energy mask from relevant arrays mad_mask = mad_emean < 4 emask = (emean_inp["f_63_ENRC"].values > 0.005) & (emean_inp["f_64_ENRC"].values > 0.005) arg_mask = np.argwhere(mad_mask & emask).flatten() # generate yet another arg_mask # Apply arg_mask emean_inp = emean_inp.iloc[arg_mask] emean_out = emean_out.iloc[arg_mask] # Reuse training_test split and normalisation across inputs return train_test_norm(emean_inp, emean_out, split)
def calculate_features(self, signals): collate = [] collate.append(np.mean(signals, axis=1)) collate.append(np.std(signals, axis=1)) collate.append(np.min(signals, axis=1)) collate.append(np.max(signals, axis=1)) collate.append(iqr(signals, axis=1)) collate.append(median_abs_deviation(signals, axis=1)) correlations = [] for i in range(len(signals)): base = (i // 3) * 3 correlations.append( np.correlate(signals[base + i % 3], signals[base + (i + 1) % 3])[0]) collate.append(correlations) return np.concatenate(collate)
def skybg_iterative(data, max_iter=100, old_med=0, old_mad=0): ''' 1. Defines initial median and mad based on mask (initially the whole image). 2. Select those pixels below the limit; 3. calculates new median and mad on 2. 4. repeat until median and mad converges or max_iter is reached Based on idea and code from Leonardo Ferreira, 2015. ''' skymed = np.median(data) skymad = median_abs_deviation(data, axis=None) if max_iter == 0 or (skymed == old_med and skymad == old_mad): return (skymed, skymad, data.mean(), data.std()) else: mask = (data < skymed + 3. * skymad) return skybg_iterative(data[mask], max_iter - 1, skymed, skymad)
def get_lognormal_stats(all_samples): """Compute lognormal stats robustly, using median stats, assuming the samples are drawn from a lognormal distribution """ is_nan_mask = np.logical_or(np.isnan(all_samples), ~np.isfinite(all_samples)) samples = all_samples[~is_nan_mask] log_samples = np.log(samples) mu = np.median(log_samples) sig2 = median_abs_deviation(log_samples, axis=None, scale='normal')**2.0 mode = np.exp(mu - sig2) std = ((np.exp(sig2) - 1.0)*(np.exp(2*mu - sig2)))**0.5 stats = dict( mu=mu, sigma=sig2**0.5, mode=mode, std=std ) return stats
def find_cutoffs(data, dev): """ Find minimum and maximum range of data to be within given Median Absolute Deviation threshold :param iterable data: data to find deviation ranges :param int dev: deviation value :return (int, int): minimum and maximum values within deviation """ median = np.median(data) mad = stats.median_abs_deviation(data) min_range = ((-1 * dev) * mad) + median # cannot be less than cutoff max_range = (dev * mad) + median if min_range < 0: return 0, max_range else: return min_range, max_range