def single_correlation_flags(tf_plane, threshold=5.0, max_iter=5, previous_sums=[], verbose=False): flags = tf_plane.mask sum_flags=flags.sum() if verbose: print('sum(flags): %s' % (sum_flags,)) print('%5.3f%s flagged\n' % ((sum_flags*100.0/product(tf_plane.shape)),'%')) if sum_flags == product(flags.shape): return flags if max_iter <= 0: return ndimage.binary_dilation(flags,iterations=2) med = ma.median(tf_plane.real) +1j*ma.median(tf_plane.imag) sigma = sqrt(ma.std(tf_plane.real)**2 + ma.std(tf_plane.imag)**2) bad_vis = abs(tf_plane.data-med) > threshold*sigma new_flags = logical_or(flags, bad_vis) new_data = ma.array(tf_plane.data, mask=new_flags) sum_flags = new_flags.sum() if verbose: print('sum_flags: %s' % (sum_flags,)) print('%5.3f%s flagged\nstd: %6.4f' % ((sum_flags*100.0/product(tf_plane.shape)),'%', ma.std(new_data))) print(sum_flags) print(previous_sums) print('------------------------------------------------------------') if sum_flags == reduce(max, previous_sums, 0): return single_correlation_flags(new_data, threshold = threshold, max_iter = 0, previous_sums = previous_sums+[sum_flags]) else: return single_correlation_flags(new_data, threshold=threshold, max_iter=max_iter-1, previous_sums=previous_sums+[sum_flags])
def _calc_correlation(self, values_1, values_2, conf_level=0.95): """ Calculates Pearson's correlation coeffcient. Arguments: values_1 -- first data values_2 -- second data conf_level -- confidence level Returns: (corr_coeff, significance) -- correlation coefficient and significance arrays """ n_samples = values_1.shape[0] # Sample length # Calculate Pearson's correlatiob coefficient values_cov = ma.sum((values_1 - ma.mean(values_1, axis=0)) * (values_2 - ma.mean(values_2, axis=0)), axis=0) corr_coef = values_cov / (ma.std(values_1, axis=0) * ma.std(values_2, axis=0)) / n_samples # Calculate significance using t-distribution with n-2 degrees of freedom. deg_fr = n_samples - 2 # Degrees of freedom. t_distr = ma.abs( corr_coef * ma.sqrt(deg_fr / (1. - corr_coef**2))) # Student's t-distribution. prob = 0.5 + conf_level / 2 # Probability for two tails. cr_value = student_t.ppf(prob, deg_fr) # Student's Critical value. significance = ma.greater(t_distr, cr_value) return corr_coef, significance
def destroy_with_variance_2pol(Data, sigma_thres=6, bad_freq_list=[], submean=True): '''Mask frequencies with high variance. This is the same as last function, but for Parkes 2 pol data. ''' # Get the normalized variance array for each polarization. #Data.data[Data.data>3] = ma.masked #Data.data[Data.data<3] = ma.masked Data.data[np.isnan(Data.data)] = ma.masked Data.data[Data.data <= 0.] = ma.masked if submean: a = ma.var(Data.data[:,0,0,:],0)/(ma.mean(Data.data[:,0,0,:],0)**2)#XX b = ma.var(Data.data[:,1,0,:],0)/(ma.mean(Data.data[:,1,0,:],0)**2)#YY else: a = ma.var(Data.data[:,0,0,:],0) b = ma.var(Data.data[:,1,0,:],0) # Get the mean and standard deviation [sigma]. means = sp.array([ma.mean(a), ma.mean(b)]) sig = sp.array([ma.std(a), ma.std(b)]) # Get the max accepted value [sigma_thres*sigma, sigma_thres=6 works really well]. max_sig = sigma_thres*sig max_accepted = means + max_sig min_accepted = means - max_sig amount_masked = 0 for freq in range(0, len(a)): if ((a[freq] > max_accepted[0]) or (b[freq] > max_accepted[1]) or (a[freq] < min_accepted[0]) or (b[freq] < min_accepted[1])): # mask amount_masked += 1 bad_freq_list.append(freq) Data.data[:,:,:,freq].mask = True return amount_masked
def destroy_time_with_mean_arrays_2pol(Data, flag_size=40): '''Mask times with high means. This is the same as last function, but for Parkes 2 pol data. ''' # Get the means over all frequencies. (for all pols. and cals.) a = ma.mean(Data.data[:, 0, 0, :], -1) b = ma.mean(Data.data[:, 1, 0, :], -1) # Get means and std for all arrays. means = sp.array([ma.mean(a), ma.mean(b)]) sig = sp.array([ma.std(a), ma.std(b)]) # Get max accepted values. max_accepted = means + 3*sig # Get min accepted values. min_accepted = means - 3*sig # Find bad times. bad_times = [] for time in range(0,len(a)): if ((a[time] > max_accepted[0]) or (b[time] > max_accepted[1]) or (a[time] < min_accepted[0]) or (b[time] < min_accepted[1])): bad_times.append(time) # Mask bad times and those +- flag_size around. for time in bad_times: if time-flag_size < 0: Data.data[0:(time+flag_size),:,:,:].mask = True else: Data.data[(time-flag_size):(time+flag_size),:,:,:].mask = True return
def get_depths(noise_maps, pix_size, mask=None, pixel_weights=None): """Compute depth_i and depth_p (sensitivities) from noise maps. :param noise_maps: the noise maps :param pix_size: the pixel size in arcmin :param mask: the mask to apply :param pixel_weights: weighting of pixels (coverage) :return: depth_i and depth_p of the map """ # apply pixel weights weighted_maps = np.empty_like(noise_maps) weighted_maps[...] = noise_maps[...] * pixel_weights # apply mask noise_ma = ma.array(weighted_maps, mask=mask) # noise estimation (in I component) using the noise maps depth_i = ma.getdata(ma.std(noise_ma[:, 0, :], axis=1)) depth_i *= pix_size # noise estimation (in Q & U components) depth_p = ma.getdata(ma.std(noise_ma[:, 1:, :], axis=(1, 2))) depth_p *= pix_size return depth_i, depth_p
def statistics(numpy_array): return {'mean' : ma.mean(numpy_array), 'median' : ma.median(numpy_array.real)+1j*ma.median(numpy_array.imag), 'max' : ma.max(abs(array)), 'min' : ma.min(abs(array)), 'std' : ma.std(array), 'stdmean': ma.std(numpy_array)/sqrt(sum(logical_not(numpy_array.mask))-1)}
def plot_all_correlations(data_col, plot_flags=True,amax_factor=1.0): flags = bad_data(data_col, threshold=4.0, max_iter=20) flagged_data = ma.array(data_col.data, mask=flags) xx,xy,yx,yy,num_pol = split_data_col(ma.array(flagged_data)) scale=ma.max(abs(flagged_data)) stddev = max(ma.std(flagged_data.real), ma.std(flagged_data.imag)) if flags.sum() == product(flags.shape): amax=1.0 else: amax=(scale-stddev)*amax_factor print('scale: %f\nsigma: %f' % (scale, stddev)) good=logical_not(xx.mask) if not plot_flags: good = None clf() if num_pol is 2: subplot(121) plot_complex_image('XX',xx, good, amin=0.0, amax=amax) subplot(122) plot_complex_image('YY',yy, good, amin=0.0, amax=amax) elif num_pol is 4: subplot(141) plot_complex_image('XX',xx, good, amin=0.0, amax=amax) subplot(142) plot_complex_image('XY',xy, good, amin=0.0, amax=amax) subplot(143) plot_complex_image('YX',yx, good, amin=0.0, amax=amax) subplot(144) plot_complex_image('YY',yy, good, amin=0.0, amax=amax) pass pass
def Portrait_diagram_subregion(obs_subregion_mean, obs_name, model_subregion_mean, model_names, seasonal_cycle, file_name, normalize=True): nmodel, nt, nregion = model_subregion_mean.shape if seasonal_cycle: obs_data = ma.mean(obs_subregion_mean.reshape( [1, nt / 12, 12, nregion]), axis=1) model_data = ma.mean(model_subregion_mean.reshape( [nmodel, nt / 12, 12, nregion]), axis=1) nt = 12 else: obs_data = obs_subregion_mean model_data = model_subregion_mean subregion_metrics = ma.zeros([4, nregion, nmodel]) for imodel in np.arange(nmodel): for iregion in np.arange(nregion): # First metric: bias subregion_metrics[0, iregion, imodel] = metrics.calc_bias( model_data[imodel, :, iregion], obs_data[0, :, iregion], average_over_time=True) # Second metric: standard deviation subregion_metrics[1, iregion, imodel] = metrics.calc_stddev_ratio( model_data[imodel, :, iregion], obs_data[0, :, iregion]) # Third metric: RMSE subregion_metrics[2, iregion, imodel] = metrics.calc_rmse( model_data[imodel, :, iregion], obs_data[0, :, iregion]) # Fourth metric: correlation subregion_metrics[3, iregion, imodel] = metrics.calc_correlation( model_data[imodel, :, iregion], obs_data[0, :, iregion]) if normalize: for iregion in np.arange(nregion): subregion_metrics[0, iregion, :] = subregion_metrics[ 0, iregion, :] / ma.std(obs_data[0, :, iregion]) * 100. subregion_metrics[ 1, iregion, :] = subregion_metrics[1, iregion, :] * 100. subregion_metrics[2, iregion, :] = subregion_metrics[ 2, iregion, :] / ma.std(obs_data[0, :, iregion]) * 100. region_names = ['R%02d' % i for i in np.arange(nregion) + 1] for imetric, metric in enumerate(['bias', 'std', 'RMSE', 'corr']): plotter.draw_portrait_diagram(subregion_metrics[imetric, :, :], region_names, model_names, file_name + '_' + metric, xlabel='model', ylabel='region')
def Portrait_diagram_subregion(obs_subregion_mean, obs_name, model_subregion_mean, model_names, seasonal_cycle, file_name, normalize=True): nmodel, nt, nregion = model_subregion_mean.shape if seasonal_cycle: obs_data = ma.mean( obs_subregion_mean.reshape([1, nt / 12, 12, nregion]), axis=1) model_data = ma.mean( model_subregion_mean.reshape([nmodel, nt / 12, 12, nregion]), axis=1) nt = 12 else: obs_data = obs_subregion_mean model_data = model_subregion_mean subregion_metrics = ma.zeros([4, nregion, nmodel]) for imodel in np.arange(nmodel): for iregion in np.arange(nregion): # First metric: bias subregion_metrics[0, iregion, imodel] = metrics.calc_bias( model_data[imodel, :, iregion], obs_data[0, :, iregion], average_over_time=True) # Second metric: standard deviation subregion_metrics[1, iregion, imodel] = metrics.calc_stddev_ratio( model_data[imodel, :, iregion], obs_data[0, :, iregion]) # Third metric: RMSE subregion_metrics[2, iregion, imodel] = metrics.calc_rmse( model_data[imodel, :, iregion], obs_data[0, :, iregion]) # Fourth metric: correlation subregion_metrics[3, iregion, imodel] = metrics.calc_correlation( model_data[imodel, :, iregion], obs_data[0, :, iregion]) if normalize: for iregion in np.arange(nregion): subregion_metrics[0, iregion, :] = subregion_metrics[ 0, iregion, :] / ma.std(obs_data[0, :, iregion]) * 100. subregion_metrics[ 1, iregion, :] = subregion_metrics[1, iregion, :] * 100. subregion_metrics[2, iregion, :] = subregion_metrics[ 2, iregion, :] / ma.std(obs_data[0, :, iregion]) * 100. region_names = ['R%02d' % i for i in np.arange(nregion) + 1] for imetric, metric in enumerate(['bias', 'std', 'RMSE', 'corr']): plotter.draw_portrait_diagram( subregion_metrics[imetric, :, :], region_names, model_names, file_name + '_' + metric, xlabel='model', ylabel='region')
def snr_func(data): data /= np.max(np.abs(data), axis=0) year_stack=ma.array(np.split(data, 10, axis=0)) stdev_all_data = ma.std(data, axis=0) signal_array = ma.mean(year_stack, axis=0) stdev_seasonal = ma.std(signal_array, axis=0) stdev_non_seasonal = stdev_all_data - stdev_seasonal return stdev_seasonal/stdev_non_seasonal
def scale(self): if self.gts.ndim == 2: self.gts = self.gts / ma.std(self.gts, axis=0) elif self.gts.ndim == 3: for i in range(0, self.gts.shape[1]): self.gts[:, i, :] = self.gts[:, i, :] / ma.std(self.gts[:, i, :], axis=0)
def scale(self): """ This normalises the SNPs/PGS columns to have variance 1. """ if self.gts.ndim == 2: self.gts = self.gts/ma.std(self.gts, axis=0) elif self.gts.ndim == 3: for i in range(0, self.gts.shape[1]): self.gts[:, i, :] = self.gts[:, i, :]/ma.std(self.gts[:, i, :], axis=0)
def plot_stdevs(data, name): data /= np.max(np.abs(data), axis=0) year_stack=ma.array(np.split(data, 10, axis=0)) vmin, vmax = 0, 0.5 #~ plt.figure(figsize=(10, 10), dpi=50) curr_map = Basemap(projection='cyl', llcrnrlon=ll_lon, llcrnrlat=ll_lat, urcrnrlon=ur_lon, urcrnrlat=ur_lat, resolution='i', area_thresh=100.) x, y = curr_map(lon, lat) plt.subplot(411) stdev_all_data = ma.std(data, axis=0) im = curr_map.pcolormesh(x, y, stdev_all_data , vmin=vmin, vmax=vmax, cmap=cmap) plt.axis('tight') plt.colorbar() curr_map.drawcoastlines() curr_map.fillcontinents(color='grey',lake_color='aqua') #~ plt.title('stdev_all_data'+ longname) plt.subplot(412) annual_means = ma.mean(year_stack, axis = 1) stdev_annual_means = ma.std(annual_means, axis=0) im = curr_map.pcolormesh(x, y, stdev_annual_means , vmin=vmin, vmax=vmax, cmap=cmap) plt.axis('tight') plt.colorbar() curr_map.drawcoastlines() curr_map.fillcontinents(color='grey',lake_color='aqua') #~ plt.title('Standard Deviation of the Annual Averages'+ longname) plt.subplot(413) signal_array = ma.mean(year_stack, axis=0) stdev_seasonal = ma.std(signal_array, axis=0) im = curr_map.pcolormesh(x, y, stdev_seasonal , vmin=vmin, vmax=vmax, cmap=cmap) plt.axis('tight') plt.colorbar() curr_map.drawcoastlines() curr_map.fillcontinents(color='grey',lake_color='aqua') #~ plt.title('stdev_seasonal'+ longname) plt.subplot(414) stdev_all_data = ma.std(data, axis=0) signal_array = ma.mean(year_stack, axis=0) stdev_seasonal = ma.std(signal_array, axis=0) stdev_non_seasonal = stdev_all_data - stdev_seasonal #~ stdev_non_seasonal = ma.stdev(noise_array, axis=0) im = curr_map.pcolormesh(x, y, stdev_non_seasonal, vmin=vmin, vmax=vmax, cmap=cmap) plt.axis('tight') plt.colorbar() curr_map.drawcoastlines() curr_map.fillcontinents(color='grey',lake_color='aqua') #~ plt.title('stdev_non_seasonal' + longname) plt.savefig('/home/nicholas/masters/figures/newplots/standard_deviations_' + name+ '.png') plt.close('all')
def flagging(data,freq,sigma_thres,linscale): """ Flags data for RFI. Designed for a single time step scan. Uses a sigma threshold to flag out anything with RFI over a certain threshold. Expects data to be linear for spline (s=1e-10). want to try something else. seems like using db data getting reasonable results for s = 1e4 Also flags out NaNs, infs. Output is flagging mask for input data array. """ # data = 10.**(data/10.) mask = zeros(len(data)) nanmask = array(where(isnan(data))[0]) mask[nanmask] = 1.0 infmask = array(where(isinf(data))[0]) mask[infmask] = 1.0 scale = linscale for f in range(0, len(data)/scale-1): # smooth = itp.UnivariateSpline(freq[f*scale:(f+1)*scale],data[f*scale:(f+1)*scale]) (Fa,Fb) = polyfit(freq[f*scale:(f+1)*scale],data[f*scale:(f+1)*scale],1) # smooth = itp.interp1d(freq[f*scale:(f+1)*scale],data[f*scale:(f+1)*scale],'linear') flat_data = data[f*scale:(f+1)*scale]/polyval([Fa,Fb],freq[f*scale:(f+1)*scale]) flat_sigma = ma.std(flat_data) flat_mean = ma.mean(flat_data) max_accept = 1.0+flat_sigma*sigma_thres min_accept = 1.0-flat_sigma*sigma_thres maxmask = array(where(flat_data>max_accept)[0]) minmask = array(where(flat_data<min_accept)[0]) maxmask = maxmask+f*scale minmask = minmask+f*scale mask[maxmask] = 1.0 mask[minmask] = 1.0 # smooth = itp.UnivariateSpline(freq[(f+1)*scale:-1],data[(f+1)*scale:-1]) # smooth = itp.interp1d(freq[(f+1)*scale:-1],data[(f+1)*scale:-1],'linear') (Fa,Fb) = polyfit(freq[(f+1)*scale:-1],data[(f+1)*scale:-1],1) flat_data = data[(f+1)*scale:-1]/polyval([Fa,Fb],freq[(f+1)*scale:-1]) # flat_data = data[(f+1)*scale:-1]/smooth(freq[(f+1)*scale:-1]) flat_sigma = ma.std(flat_data) flat_mean = ma.mean(flat_data) max_accept = 1.0+flat_sigma*sigma_thres min_accept = 1.0-flat_sigma*sigma_thres maxmask = array(where(flat_data>max_accept)[0]) minmask = array(where(flat_data<min_accept)[0]) maxmask = maxmask+(f+1)*scale minmask = minmask+(f+1)*scale mask[maxmask] = 1.0 mask[minmask] = 1.0 return mask
def flagging(data,freq,sigma_thres,linscale): """ Flags data for RFI. Designed for a single time step scan. Uses a sigma threshold to flag out anything with RFI over a certain threshold. Also flags out NaNs, infs. Inputs are: data - linear input freq - can be any units sigma_thres - cutoff for bad data linscale - size of flattened window Output is flagging mask for input data array. """ mask = np.zeros(len(data)) nanmask = np.where(np.isnan(data))[0] mask[nanmask] = 1.0 infmask = np.where(np.isinf(data))[0] mask[infmask] = 1.0 scale = linscale for f in range(0, len(data)/scale-1): (Fa,Fb) = np.polyfit(freq[f*scale:(f+1)*scale],data[f*scale:(f+1)*scale],1) flat_data = data[f*scale:(f+1)*scale]/np.polyval([Fa,Fb],freq[f*scale:(f+1)*scale]) flat_sigma = ma.std(flat_data) flat_mean = ma.mean(flat_data) max_accept = 1.0+flat_sigma*sigma_thres min_accept = 1.0-flat_sigma*sigma_thres maxmask = ma.array(np.where(flat_data>max_accept)[0]) minmask = ma.array(np.where(flat_data<min_accept)[0]) maxmask = maxmask+f*scale minmask = minmask+f*scale mask[maxmask] = 1.0 mask[minmask] = 1.0 (Fa,Fb) = np.polyfit(freq[(f+1)*scale:-1],data[(f+1)*scale:-1],1) flat_data = data[(f+1)*scale:-1]/np.polyval([Fa,Fb],freq[(f+1)*scale:-1]) flat_sigma = ma.std(flat_data) flat_mean = ma.mean(flat_data) max_accept = 1.0+flat_sigma*sigma_thres min_accept = 1.0-flat_sigma*sigma_thres maxmask = ma.array(np.where(flat_data>max_accept)[0]) minmask = ma.array(np.where(flat_data<min_accept)[0]) maxmask = maxmask+(f+1)*scale minmask = minmask+(f+1)*scale mask[maxmask] = 1.0 mask[minmask] = 1.0 return mask
def destroy_time_with_mean_arrays(Data, flag_size=40): '''Mask times with high means. If there is a problem in time, the mean over all frequencies will stand out greatly [>10 sigma has been seen]. Flag these bad times and +- `flag_size` times around it. Will only be called if `Data` has 'badness'. Parameters ---------- Data : DataBlock Contains information in a usable format direct from GBT. Bad times will be flagged in all polarizations and cal states. time_cut : int How many frequency bins (as an absolute number) to flag in time. ''' # Get the means over all frequencies. (for all pols. and cals.) a = ma.mean(Data.data[:, 0, 0, :], -1) b = ma.mean(Data.data[:, 1, 0, :], -1) c = ma.mean(Data.data[:, 2, 0, :], -1) d = ma.mean(Data.data[:, 3, 0, :], -1) e = ma.mean(Data.data[:, 0, 1, :], -1) f = ma.mean(Data.data[:, 1, 1, :], -1) g = ma.mean(Data.data[:, 2, 1, :], -1) h = ma.mean(Data.data[:, 3, 1, :], -1) # Get means and std for all arrays. means = sp.array([ ma.mean(a), ma.mean(b), ma.mean(c), ma.mean(d), ma.mean(e), ma.mean(f), ma.mean(g), ma.mean(h) ]) sig = sp.array([ ma.std(a), ma.std(b), ma.std(c), ma.std(d), ma.std(e), ma.std(f), ma.std(g), ma.std(h) ]) # Get max accepted values. max_accepted = means + 3 * sig # Find bad times. bad_times = [] for time in range(0, len(a)): if ((a[time] > max_accepted[0]) or (b[time] > max_accepted[1]) or (c[time] > max_accepted[2]) or (d[time] > max_accepted[3]) or (e[time] > max_accepted[4]) or (f[time] > max_accepted[5]) or (g[time] > max_accepted[6]) or (h[time] > max_accepted[7])): bad_times.append(time) # Mask bad times and those +- flag_size around. for time in bad_times: Data.data[(time - flag_size):(time + flag_size), :, :, :].mask = True return
def get_noise_levels(ncfile): # ---------------- # Open NetCDF file # ---------------- print('Opening NetCDF file ' + ncfile) dataset = nc4.Dataset(ncfile,'r+',format='NETCDF3_CLASSIC') nray = len(dataset.dimensions['time']); ngate = len(dataset.dimensions['range']); elv = np.transpose(np.tile(dataset.variables['elevation'][:],(ngate,1))); rng = np.tile(dataset.variables['range'][:],(nray,1)) height = rng*np.sin(elv*np.pi/180.) zh = dataset.variables['ZED_H'][:]; zed = ma.masked_where(height<14000, zh); rngkm = ma.masked_where(rng<=0.0, rng/1000.); range2 = 20.*ma.log10(rngkm); zh[:] = zed - range2; zv = zh.copy(); zv[:] = zh[:] - dataset.variables['ZDR'][:] zx = zh.copy(); zx[:] = zh[:] + dataset.variables['LDR'][:] nezharr = ma.mean(zh,axis=1) nezherr = ma.std(zh,axis=1) nezvarr = ma.mean(zv,axis=1) nezverr = ma.std(zv,axis=1) nezxarr = ma.mean(zx,axis=1) nezxerr = ma.std(zx,axis=1) nezharr = ma.masked_where(nezherr>MAX_ERR,nezharr) nezvarr = ma.masked_where(nezverr>MAX_ERR,nezvarr) nezxarr = ma.masked_where(nezxerr>MAX_ERR,nezxarr) nezh = ma.median(nezharr) nezv = ma.median(nezvarr) nezx = ma.median(nezxarr) dataset.close() return np.round(nezh,2), np.round(nezv,2), np.round(nezx,2)
def average_combine(self): """Average combine together a set of arrays. A CCDData object is returned with the data property set to the average of the arrays. If the data was masked or any data have been rejected, those pixels will not be included in the median. A mask will be returned, and if a pixel has been rejected in all images, it will be masked. The uncertainty of the combined image is set by the standard deviation of the input images. Returns ------- combined_image: CCDData object CCDData object based on the combined input of CCDData objects. """ #set up the data data, wei = ma.average(self.data_arr, axis=0, weights=self.weights, returned=True) #set up the mask mask = self.data_arr.mask.sum(axis=0) mask = (mask == len(self.data_arr)) #set up the variance uncertainty = ma.std(self.data_arr, axis=0) #create the combined image combined_image = CCDData(data.data, mask=mask, unit=self.unit, uncertainty=StdDevUncertainty(uncertainty)) #update the meta data combined_image.meta['NCOMBINE'] = len(self.data_arr) #return the combined image return combined_image
def test_baseline_use_all_features_with_signified_random(data, conf): conf['feature_selection']['must_be_in_thesaurus'] = False conf['vectorizer']['decode_token_handler'] = \ 'eval.pipeline.feature_handlers.SignifiedOnlyFeatureHandler' conf['vectorizer']['k'] = 1 x1, x2, voc = _vectorize_data(data, conf, dummy=True) assert full_vocab == strip(voc) assert isinstance(x1, sp.spmatrix) t.assert_array_equal( x1.toarray(), training_matrix ) t.assert_array_almost_equal( x2.toarray(), np.array( [ [0, 11.0, 0, 0, 0, 0], ] ) ) # the thesaurus will always say the neighbour for something is # b/N with a similarity of 1, and we look up 11 tokens overall in # the test document x1, x2, voc = _vectorize_data(data, conf, dummy=True) assert x2.sum(), 11.0 assert std(x2.todense()) > 0
def is_hit(history_data, test_data, year): """ :param history_data: probability for default given rating for a past years :type history_data: np.array :param test_data: default for given rating in given year :type test_data: float :return: """ m = mean(history_data) st_dev = std(history_data) floar = get_upper(history_data) min_defaults = max(0.0, m - 1.96 * st_dev) max_defaults = min(100.0, m + 1.96 * st_dev) if year in [2007, 2008, 2011, 2014]: in_interval = "\\in" color = "green" if not min_defaults <= test_data <= max_defaults: in_interval = "\\not " + in_interval color = "red" min_defaults = "%.1f" % min_defaults max_defaults = "%.1f" % max_defaults end = "& " if year != 2014 else "\\\\" print("$\\textcolor{{{5}}}{{ {3} {4} [{1}, {2}] }}$".format(year, min_defaults, max_defaults, test_data, in_interval, color), end=end) if abs(test_data - m) <= 1.96 * st_dev: return 0 return 1
def pca(data, nPCs=-1): domain = None suma = data.sum(axis=0) / float(len(data)) data -= suma # substract average value to get zero mean data /= MA.std(data, axis=0) covMatrix = MA.dot(data.T, data) / len(data) eigVals, eigVectors = linalg.eigh(covMatrix) eigVals = list(eigVals) if nPCs == -1: nPCs = len(eigVals) nPCs = min(nPCs, len(eigVals)) pairs = [(val, i) for i, val in enumerate(eigVals)] pairs.sort() pairs.reverse() indices = [pair[1] for pair in pairs[:nPCs] ] # take indices of the wanted number of principal components vectors = MA.take(eigVectors, indices, axis=1) values = [eigVals[i] for i in indices] projectedData = MA.dot(data, vectors) return projectedData, vectors, values
def pca(data, nPCs = -1): domain = None suma = data.sum(axis=0)/float(len(data)) data -= suma # substract average value to get zero mean data /= MA.std(data, axis=0) covMatrix = MA.dot(data.T, data) / len(data) eigVals, eigVectors = linalg.eigh(covMatrix) eigVals = list(eigVals) if nPCs == -1: nPCs = len(eigVals) nPCs = min(nPCs, len(eigVals)) pairs = [(val, i) for i, val in enumerate(eigVals)] pairs.sort() pairs.reverse() indices = [pair[1] for pair in pairs[:nPCs]] # take indices of the wanted number of principal components vectors = MA.take(eigVectors, indices, axis = 1) values = [eigVals[i] for i in indices] projectedData = MA.dot(data, vectors) return projectedData, vectors, values
def update_background(fn): with fits.open(fn, mode='update') as hdu: im = hdu[0].data.copy() mask = ~np.isfinite(im) + (im < DATA_FLOOR) if 'MASK' in hdu: mask += hdu['MASK'].data > 0 im = ma.MaskedArray(im, mask=mask, copy=True) scim = sigma_clip(im) mean = ma.mean(scim) mean = mean if mean is not ma.masked else 0 median = ma.median(scim) median = median if median is not ma.masked else 0 stdev = ma.std(scim) stdev = stdev if stdev is not ma.masked else 0 hdu['SCI'].header['bgmean'] = (mean, 'background sigma-clipped mean') hdu['SCI'].header['bgmedian'] = (median, 'background sigma-clipped median') hdu['SCI'].header['bgstdev'] = ( stdev, 'background sigma-clipped standard dev.') hdu['SCI'].header['nbg'] = (ma.sum(~scim.mask), 'area considered in background stats.')
def calc_subregion_area_mean_and_std(dataset_array, subregions): ''' Calculate area mean and standard deviation values for a given subregions using datasets on common grid points :param dataset_array: An array of OCW Dataset Objects :type list: :param subregions: list of subregions :type subregions: :class:`numpy.ma.array` :returns: area averaged time series for the dataset of shape (ntime, nsubregion) ''' ndata = len(dataset_array) dataset0 = dataset_array[0] if dataset0.lons.ndim == 1: lons, lats = np.meshgrid(dataset0.lons, dataset0.lats) else: lons = dataset0.lons lats = dataset0.lats subregion_array = np.zeros(lons.shape) mask_array = dataset_array[0].values[0,:].mask # dataset0.values.shsape[0]: length of the time dimension # spatial average t_series =ma.zeros([ndata, dataset0.values.shape[0], len(subregions)]) # spatial standard deviation spatial_std =ma.zeros([ndata, dataset0.values.shape[0], len(subregions)]) for iregion, subregion in enumerate(subregions): lat_min, lat_max, lon_min, lon_max = subregion[1] y_index,x_index = np.where((lats >= lat_min) & (lats <= lat_max) & (lons >= lon_min) & (lons <= lon_max)) subregion_array[y_index,x_index] = iregion+1 for idata in np.arange(ndata): t_series[idata, :, iregion] = ma.mean(dataset_array[idata].values[:,y_index, x_index], axis=1) spatial_std[idata, :, iregion] = ma.std(dataset_array[idata].values[:,y_index, x_index], axis=1) subregion_array = ma.array(subregion_array, mask=mask_array) return t_series, spatial_std, subregion_array
def makeFluxSigMask(flux=None, minThresh=2, maxThresh=5): """ Compute the mean total integrated flux value and its standard deviation. Find all pixels with a flux in between min/max thresholds and mask them. Parameters ---------- fluxImage: array_like The 2D array of the total integrated fluxes. min/maxThresh: int Sigma limit thresholds for the min/max. Return ------ Boolean mask. """ sigma = ma.std(flux) ave = ma.mean(flux) if sigma > ave: intervalMin = ave else: intervalMin = ave - (minThresh * sigma) intervalMax = ave + (maxThresh * sigma) maskedOutside = ma.masked_outside(flux, intervalMin, intervalMax) maskedZeros = ma.masked_where(maskedOutside == 0, maskedOutside, copy=False) return ma.getmask(maskedZeros)
def _check(self, data): array = biggus.NumpyArrayAdapter(data) result = std(array, axis=0, ddof=0).masked_array() expected = ma.std(data, axis=0, ddof=0) if expected.ndim == 0: expected = ma.asarray(expected) np.testing.assert_array_equal(result.filled(), expected.filled()) np.testing.assert_array_equal(result.mask, expected.mask)
def calc_stddev(array, axis=None): """ Calculate a sample standard deviation of an array along the array :param array: an array to calculate sample standard deviation :type array: :class:'numpy.ma.core.MaskedArray' :param axis: Axis along which the sample standard deviation is computed. :type axis: 'int' :returns: sample standard deviation of array :rtype: :class:'numpy.ma.core.MaskedArray' """ if isinstance(axis, int): return ma.std(array, axis=axis, ddof=1) else: return ma.std(array, ddof=1)
def calc_chrom_fast(self, index, coords_vals): self.population[index]['fitness'] = \ np.abs(self.array_mean - ma.mean(coords_vals[0])) + \ np.abs(self.array_stdev - ma.std(coords_vals[0])) + \ np.abs(self.array_range - (ma.max(coords_vals[0])-ma.min(coords_vals[0])))/10 + \ np.abs((self.chromosome_size-1) - coords_vals[2]) #locations #~ print "Chromosome size: ",self.chromosome_size print "Number of locations is: ", coords_vals[2]
def calc_stddev(array, axis=None): ''' Calculate a sample standard deviation of an array along the array :param array: an array to calculate sample standard deviation :type array: :class:'numpy.ma.core.MaskedArray' :param axis: Axis along which the sample standard deviation is computed. :type axis: 'int' :returns: sample standard deviation of array :rtype: :class:'numpy.ma.core.MaskedArray' ''' if isinstance(axis, int): return ma.std(array, axis=axis, ddof=1) else: return ma.std(array, ddof=1)
def std_(self): """ calculates the standard deviation of the image over the binarised segmentation :return: """ return ma.std(self.masked_img, 0)
def _sky(data, ellipsefit, diameter=2.0): """Estimate the sky brightness in each band.""" #area = diameter**2 # arcsec^2 for filt in band: img = data['{}_masked'.format(filt)] #ellipsefit['{}_sky'.format(filt)] = 22.5 - 2.5 * np.log10( ma.std(img) ) #ellipsefit['mu_{}_sky'.format(filt)] = ellipsefit['{}_sky'.format(filt)] # + 2.5 * np.log10(area) ellipsefit['mu_{}_sky'.format(filt)] = 22.5 - 2.5 * np.log10( ma.std(img) )
def search_noise(data, low_deviation, high_deviation, max_diff): global logger high_info = list() low_info = list() jitter_info = list() spec_median = ma.median(data, axis=2) spec_max = spec_median.max(axis=1) spec_min = spec_median.min(axis=1) ref_value = ma.median(data) ref_diff = ma.median(spec_max) - ma.median(spec_min) ref_std = ma.std(spec_median) limit = ref_value + min(max((ref_std * 3.0),0.75), high_deviation) n_secs = data.shape[1] logger.debug("median-signal=%5.3fdB, median-fluctuation=%5.3fdB, std=%5.3f, high-limit=%5.3fdB" %(ref_value, ref_diff, ref_std, limit)) for rcu in range(data.shape[0]): peaks = cSearchPeak(data[rcu,0,:]) if not peaks.valid_data: return (low_info, high_info, jitter_info) peaks.search(delta=10.0) if peaks.nMaxPeaks() >= 30: logger.debug("RCU=%d: found %d peaks, skip noise test" %(rcu, peaks.nMaxPeaks())) else: n_bad_high_secs = 0 n_bad_low_secs = 0 n_bad_jitter_secs = 0 rcu_max_diff = spec_max[rcu] - spec_min[rcu] for val in spec_median[rcu,:]: #logger.debug("RCU=%d: high-noise value=%5.3fdB max-ref-value=%5.3fdB" %(rcu, val, ref_val)) if ((val > limit) and (rcu_max_diff > 1.0)) or (val > (ref_value + high_deviation)): n_bad_high_secs += 1 if ((val < (ref_value + low_deviation)) and (rcu_max_diff > 1.0)) or (val < (ref_value + low_deviation)): n_bad_low_secs += 1 if n_bad_high_secs > 0: high_info.append((rcu, spec_max[rcu], n_bad_high_secs, limit, rcu_max_diff)) logger.debug("RCU=%d: max-noise=%5.3f %d of %d seconds bad" %(rcu, spec_max[rcu], n_bad_high_secs, n_secs)) if n_bad_low_secs > 0: low_info.append((rcu, spec_min[rcu], n_bad_low_secs , (ref_value+low_deviation), rcu_max_diff)) logger.debug("RCU=%d: min-noise=%5.3f %d of %d seconds bad" %(rcu, spec_min[rcu], n_bad_low_secs, n_secs)) if (n_bad_high_secs == 0) and (n_bad_low_secs == 0): if rcu_max_diff > (ref_diff + max_diff): check_high_value = ref_value + (ref_diff / 2.0) check_low_value = ref_value - (ref_diff / 2.0) for val in spec_median[rcu,:]: if val > check_high_value or val < check_low_value: n_bad_jitter_secs += 1 jitter_info.append((rcu, rcu_max_diff, ref_diff, n_bad_jitter_secs)) logger.debug("RCU=%d: max spectrum fluctuation %5.3f dB" %(rcu, rcu_max_diff)) return (low_info, high_info, jitter_info)
def average_combine(self, scale_func=ma.average, scale_to=None): """ Average combine together a set of arrays. A `~ccdproc.CCDData` object is returned with the data property set to the average of the arrays. If the data was masked or any data have been rejected, those pixels will not be included in the average. A mask will be returned, and if a pixel has been rejected in all images, it will be masked. The uncertainty of the combined image is set by the standard deviation of the input images. Parameters ---------- scale_func : function, optional Function to calculate the average. Defaults to `~numpy.ma.average`. scale_to : float, optional Scaling factor used in the average combined image. If given, it overrides ``CCDData.scaling``. Defaults to None. Returns ------- combined_image: `~ccdproc.CCDData` CCDData object based on the combined input of CCDData objects. """ if scale_to is not None: scalings = scale_to elif self.scaling is not None: scalings = self.scaling else: scalings = 1.0 # set up the data data, wei = scale_func(scalings * self.data_arr, axis=0, weights=self.weights, returned=True) # set up the mask mask = self.data_arr.mask.sum(axis=0) mask = mask == len(self.data_arr) # set up the deviation uncertainty = ma.std(self.data_arr, axis=0) # create the combined image with a dtype that matches the combiner combined_image = CCDData( np.asarray(data.data, dtype=self.dtype), mask=mask, unit=self.unit, uncertainty=StdDevUncertainty(uncertainty), ) # update the meta data combined_image.meta["NCOMBINE"] = len(self.data_arr) # return the combined image return combined_image
def average_combine(self, scale_func=ma.average, scale_to=None): """ Average combine together a set of arrays. A `~ccdproc.CCDData` object is returned with the data property set to the average of the arrays. If the data was masked or any data have been rejected, those pixels will not be included in the average. A mask will be returned, and if a pixel has been rejected in all images, it will be masked. The uncertainty of the combined image is set by the standard deviation of the input images. Parameters ---------- scale_func : function, optional Function to calculate the average. Defaults to `~numpy.ma.average`. scale_to : float, optional Scaling factor used in the average combined image. If given, it overrides ``CCDData.scaling``. Defaults to None. Returns ------- combined_image: `~ccdproc.CCDData` CCDData object based on the combined input of CCDData objects. """ if scale_to is not None: scalings = scale_to elif self.scaling is not None: scalings = self.scaling else: scalings = 1.0 # set up the data data, wei = scale_func(scalings * self.data_arr, axis=0, weights=self.weights, returned=True) # set up the mask mask = self.data_arr.mask.sum(axis=0) mask = (mask == len(self.data_arr)) # set up the deviation uncertainty = ma.std(self.data_arr, axis=0) # create the combined image with a dtype that matches the combiner combined_image = CCDData(np.asarray(data.data, dtype=self.dtype), mask=mask, unit=self.unit, uncertainty=StdDevUncertainty(uncertainty)) # update the meta data combined_image.meta['NCOMBINE'] = len(self.data_arr) # return the combined image return combined_image
def infer_ks_test_goodness(l1): # l = np.histogram(l1) # n = len(l) mean = average(l1) sigma = std(l1) res = kstest(l1, 'norm', [mean, sigma]) if res[1] < 0.01: print('reject') else: print('accept') print(res)
def get_mask(self): self.array_mean = ma.mean(self.array) self.array_stdev = ma.std(self.array) self.array_range = ma.max(self.array) - ma.min(self.array) print "The mean is %f, the stdev is %f, the range is %f." %(self.array_mean, self.array_stdev, self.array_range) from scipy.io.netcdf import netcdf_file as NetCDFFile ### get landmask nc = NetCDFFile(os.getcwd()+ '/../data/netcdf_files/ORCA2_landmask.nc','r') self.mask = ma.masked_values(nc.variables['MASK'][:, :self.time_len, :self.lat_len, :180], -9.99999979e+33) nc.close() self.xxx, self.yyy, self.zzz = np.lib.index_tricks.mgrid[0:self.time_len, 0:self.lat_len, 0:180]
def calculate_moments(d, minchan=False, maxchan=False, vel=False, bestmask=False, mask=False): """This function actually calculates moments""" nglat = d.shape[1] nglon = d.shape[2] nspec = d.shape[0] maps = np.zeros( (nglat, nglon), dtype={ 'names': [ 'mean', 'sd', 'errmn', 'errsd', 'skew', 'kurt', 'error', 'intint', 'npix' ], 'formats': ['f4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4'] }) #These definitions for mask seem backward but are correct. noise_portion = ma.masked_where(mask == 1, d) good_d = d[minchan:maxchan, ...] mask2 = mask[minchan:maxchan, ...] #print(minchan) #print(maxchan) signal_portion = ma.masked_where(mask2 == 0, good_d) maps['error'] = ma.std(noise_portion, axis=0) maps['intint'] = ma.sum(signal_portion, axis=0) for x in range(nglat): for y in range(nglon): fullspec = d[..., x, y] #Exract a single spectrum ind = np.arange(nspec) velmask = mask[minchan:maxchan, x, y] if np.sum(velmask) != 0: velmask = bestmask npix = max(np.sum(velmask), 1) ind = ind[velmask > 0] sigma = maps['error'][x, y] if ind.size > 2 and (sigma > 0): mom = idl_stats.wt_moment(vel[ind], fullspec[ind], errors=np.zeros(ind.size) + sigma) maps['mean'][x, y] = mom['mean'] maps['sd'][x, y] = mom['stdev'] maps['errmn'][x, y] = mom['errmn'] maps['errsd'][x, y] = mom['errsd'] maps['npix'][x, y] = npix else: maps['mean'][x, y] = np.nan maps['sd'][x, y] = np.nan maps['errmn'][x, y] = np.nan maps['errsd'][x, y] = np.nan maps['npix'][x, y] = np.nan return (maps)
def calc_chrom_fast(self, index, coords_vals): self.population[index]["fitness"] = np.abs(self.array_mean - ma.mean(coords_vals[0])) + np.abs( self.array_stdev - ma.std(coords_vals[0]) ) # + \ # np.abs(self.array_range - (ma.max(coords_vals[0])-ma.min(coords_vals[0])))/10 + \ # np.abs((self.chromosome_size-1) - coords_vals[2]) #locations # ~ print "Chromosome size: ",self.chromosome_size # print "Number of locations is: ", coords_vals[2] # ~ print "The sample range is: %g. The array range is: %g " % ((ma.max(coords_vals[0])-ma.min(coords_vals[0])), self.array_range) # ~ print np.abs(self.array_mean - ma.mean(coords_vals[0])), np.abs(self.array_stdev - ma.std(coords_vals[0])), np.abs(self.array_range - (ma.max(coords_vals[0])-ma.min(coords_vals[0]))) # ~ print ma.mean(coords_vals[0]), ma.std(coords_vals[0]), (ma.max(coords_vals[0])-ma.min(coords_vals[0])) "Fitness is: ", self.population[index]["fitness"]
def generate_rb_hist_n(f_name, n, doy_start, doy_end): fh_in = Dataset(os.path.join("Data", "Sentinel", f_name + ".nc"), "r") out_path = get_out_path( os.path.join("Data", "Sentinel", "usa_rb_hist_" + str(n))) init_doy, final_doy = f_name.split("_")[1], f_name.split("_")[2] init_doy = date(*map(int, [init_doy[:4], init_doy[4:6], init_doy[6:]])) final_doy = date(*map(int, [final_doy[:4], final_doy[4:6], final_doy[6:]])) doy_s = date(*map(int, [doy_start[:4], doy_start[4:6], doy_start[6:]])) doy_e = date(*map(int, [doy_end[:4], doy_end[4:6], doy_end[6:]])) assert ((doy_s - init_doy).days >= n) assert ((final_doy - doy_e).days >= 0) i_doy = (doy_s - init_doy).days for doy in generate_doy(doy_start, doy_end, ""): fh_out = Dataset(os.path.join(out_path, doy + ".nc"), "w") for name, dim in fh_in.dimensions.items(): if name != "time": fh_out.createDimension(name, len(dim)) for v_name, varin in fh_in.variables.items(): if v_name == 'lat' or v_name == 'lon': outVar = fh_out.createVariable(v_name, varin.datatype, varin.dimensions) outVar.setncatts( {k: varin.getncattr(k) for k in varin.ncattrs()}) outVar[:] = varin[:] elif v_name != "time": outVar = fh_out.createVariable(v_name + "_hist_mean_" + str(n), varin.datatype, ( "lat", "lon", )) outVar.setncatts( {k: varin.getncattr(k) for k in varin.ncattrs()}) s_doy = i_doy - n print(s_doy, i_doy) outVar[:] = ma.mean(varin[s_doy:i_doy, :, :], axis=0) outVar = fh_out.createVariable(v_name + "_hist_std_" + str(n), varin.datatype, ( "lat", "lon", )) outVar[:] = ma.std(varin[s_doy:i_doy, :, :], axis=0) i_doy += 1 fh_out.close() fh_in.close()
def res_dist(x, y, e, n_runs=100, random_state=None): x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.4, random_state=random_state) test_res = [] train_res = [] start_time = time() for i in range(n_runs): e.fit(x_train, y_train) train_res.append(e.score(x_train, y_train)) test_res.append(e.score(x_test, y_test)) if (i % (n_runs / 10) == 0): print("%d" % i, end=' ') print("\nTime: %.3f secs" % (time() - start_time)) print("Test Min: %.3f Mean: %.3f Max: %.3f SD: %.3f" % (min(test_res), mean(test_res), max(test_res), std(test_res))) print("Train Min: %.3f Mean: %.3f Max: %.3f SD: %.3f" % (min(train_res), mean(train_res), max(train_res), std(train_res))) return (train_res, test_res)
def flag_data(dynamic_spectrum, channels_per_subband=16): data = dynamic_spectrum data_mean = data.mean() data_std = data.std() flags= numpy.logical_or(abs(data - data_mean) > 8*data_std, data == 0) flags[:, 0::channels_per_subband] = True flagged_data = ma.array(data, mask=flags, copy=True) data_mean = ma.mean(flagged_data) data_std = ma.std(flagged_data) flags = numpy.logical_or(abs(data - data_mean) > 6*data_std, data == 0) print(type(flags)) flags[:, 0::channels_per_subband] = True flagged_data = ma.array(data, mask=flags, copy=True) data_mean = ma.mean(flagged_data) data_std = ma.std(flagged_data) flags = numpy.logical_or(abs(data - data_mean) > 4*data_std, data == 0) flags[:, 0::channels_per_subband] = True flagged_data = ma.array(data, mask=flags, copy=True) data_mean = ma.mean(flagged_data) data_std = ma.std(flagged_data) flags = numpy.logical_or(abs(data - data_mean) > 4*data_std, data == 0) flags[:, 0::channels_per_subband] = True flagged_data = ma.array(data, mask=flags, copy=True) data_mean = ma.mean(flagged_data) data_std = ma.std(flagged_data) flags = numpy.logical_or(abs(data - data_mean) > 4*data_std, data == 0) flags[:, 0::channels_per_subband] = True flagged_data = ma.array(data, mask=flags, copy=True) return flagged_data
def destroy_time_with_mean_arrays(Data, flag_size=40): '''Mask times with high means. If there is a problem in time, the mean over all frequencies will stand out greatly [>10 sigma has been seen]. Flag these bad times and +- `flag_size` times around it. Will only be called if `Data` has 'badness'. Parameters ---------- Data : DataBlock Contains information in a usable format direct from GBT. Bad times will be flagged in all polarizations and cal states. time_cut : int How many frequency bins (as an absolute number) to flag in time. ''' # Get the means over all frequencies. (for all pols. and cals.) a = ma.mean(Data.data[:, 0, 0, :], -1) b = ma.mean(Data.data[:, 1, 0, :], -1) c = ma.mean(Data.data[:, 2, 0, :], -1) d = ma.mean(Data.data[:, 3, 0, :], -1) e = ma.mean(Data.data[:, 0, 1, :], -1) f = ma.mean(Data.data[:, 1, 1, :], -1) g = ma.mean(Data.data[:, 2, 1, :], -1) h = ma.mean(Data.data[:, 3, 1, :], -1) # Get means and std for all arrays. means = sp.array([ma.mean(a), ma.mean(b), ma.mean(c), ma.mean(d), ma.mean(e), ma.mean(f), ma.mean(g), ma.mean(h)]) sig = sp.array([ma.std(a), ma.std(b), ma.std(c), ma.std(d), ma.std(e), ma.std(f), ma.std(g), ma.std(h)]) # Get max accepted values. max_accepted = means + 3*sig # Find bad times. bad_times = [] for time in range(0,len(a)): if ((a[time] > max_accepted[0]) or (b[time] > max_accepted[1]) or (c[time] > max_accepted[2]) or (d[time] > max_accepted[3]) or (e[time] > max_accepted[4]) or (f[time] > max_accepted[5]) or (g[time] > max_accepted[6]) or (h[time] > max_accepted[7])): bad_times.append(time) # Mask bad times and those +- flag_size around. for time in bad_times: Data.data[(time-flag_size):(time+flag_size),:,:,:].mask = True return
def calculate_moments(d,minchan=False,maxchan=False,vel=False,bestmask=False,mask=False): nglat = d.shape[1] nglon = d.shape[2] nspec = d.shape[0] maps = np.zeros((nglat,nglon),dtype={'names':['mean','sd','errmn', 'errsd','skew','kurt','error','intint','npix'], 'formats':['f4','f4','f4','f4','f4','f4','f4','f4','f4']}) #These definitions for mask seem backward but are correct. noise_portion = ma.masked_where(mask == 1,d) good_d = d[minchan:maxchan,...] mask2 = mask[minchan:maxchan,...] #print(mask) #print(mask2) print(minchan) print(maxchan) signal_portion = ma.masked_where(mask2 == 0,good_d) maps['error'] = ma.std(noise_portion,axis=0) maps['intint'] = ma.sum(signal_portion,axis=0) #print(maps['error']) for x in range(nglat): for y in range(nglon): fullspec = d[...,x,y]#Exract a single spectrum ind = np.arange(nspec) velmask = mask[minchan:maxchan,x,y] if np.sum(velmask) != 0: velmask = bestmask npix = max(np.sum(velmask),1) ind = ind[velmask > 0] sigma = maps['error'][x,y] if ind.size > 2 and (sigma > 0): mom = idl_stats.wt_moment(vel[ind],fullspec[ind], errors = np.zeros(ind.size)+sigma) maps['mean'][x,y] = mom['mean'] maps['sd'][x,y] = mom['stdev'] maps['errmn'][x,y] = mom['errmn'] maps['errsd'][x,y] = mom['errsd'] maps['npix'][x,y] = npix else: maps['mean'][x,y] = np.nan maps['sd'][x,y] = np.nan maps['errmn'][x,y] = np.nan maps['errsd'][x,y] = np.nan maps['npix'][x,y] = np.nan return(maps)
def nothing(noth): # If requested, remove the time gradient from all channels. if remove_slope: un_mask = sp.logical_not(ma.getmaskarray(NoiseData.data)) NoiseData.calc_time() time = NoiseData.time n_time = len(time) # Test if the mask is the same for all slices. If it is, that greatly # reduces the work as we only have to generate one set of polynomials. all_masks_same = True for jj in range(n_time): if sp.all(un_mask[jj, ...] == un_mask[jj, 0, 0, 0]): continue else: all_masks_same = False break if all_masks_same: polys = misc.ortho_poly(time, 2, un_mask[:, 0, 0, 0], 0) polys.shape = (2, len(time), 1, 1, 1) else: polys = misc.ortho_poly(time[:, None, None, None], 2, un_mask, 0) # Subtract the slope mode (1th mode) out of the NoiseData. slope_amps = sp.sum(polys[1, ...] * un_mask * NoiseData.data.filled(0), 0) NoiseData.data -= polys[1, ...] * slope_amps # Iteratively flag on sliding scale to get closer and closer to desired # threshold. n_time = Data.data.shape[0] max_thres = sp.sqrt(n_time) / 2. n_iter = 3 thresholds = (max_thres**(n_iter - 1 - sp.arange(n_iter)) * thres**sp.arange(n_iter))**(1. / (n_iter - 1)) for threshold in thresholds: # Get the deviation from the mean. residuals = ma.anom(NoiseData.data, 0).filled(0) # Get indices above the threshold. mask = abs(residuals) > threshold * ma.std(NoiseData.data, 0) # Mask the data. Data.data[mask] = ma.masked NoiseData.data[mask] = ma.masked # Now flag for very noisey channels. if max_noise_factor > 0: vars = ma.var(NoiseData.data, 0) mean_vars = ma.mean(vars, -1).filled(0) bad_chans = vars.filled(0) > max_noise_factor * mean_vars[:, :, None] Data.data[:, bad_chans] = ma.masked NoiseData.data[:, bad_chans] = ma.masked
def nothing(noth): # If requested, remove the time gradient from all channels. if remove_slope: un_mask = sp.logical_not(ma.getmaskarray(NoiseData.data)) NoiseData.calc_time() time = NoiseData.time n_time = len(time) # Test if the mask is the same for all slices. If it is, that greatly # reduces the work as we only have to generate one set of polynomials. all_masks_same = True for jj in range(n_time): if sp.all(un_mask[jj,...] == un_mask[jj,0,0,0]): continue else: all_masks_same = False break if all_masks_same: polys = misc.ortho_poly(time, 2, un_mask[:,0,0,0], 0) polys.shape = (2, len(time), 1, 1, 1) else: polys = misc.ortho_poly(time[:,None,None,None], 2, un_mask, 0) # Subtract the slope mode (1th mode) out of the NoiseData. slope_amps = sp.sum(polys[1,...] * un_mask * NoiseData.data.filled(0), 0) NoiseData.data -= polys[1,...] * slope_amps # Iteratively flag on sliding scale to get closer and closer to desired # threshold. n_time = Data.data.shape[0] max_thres = sp.sqrt(n_time)/2. n_iter = 3 thresholds = (max_thres ** (n_iter - 1 - sp.arange(n_iter)) * thres ** sp.arange(n_iter)) ** (1./(n_iter - 1)) for threshold in thresholds: # Get the deviation from the mean. residuals = ma.anom(NoiseData.data, 0).filled(0) # Get indices above the threshold. mask = abs(residuals) > threshold * ma.std(NoiseData.data, 0) # Mask the data. Data.data[mask] = ma.masked NoiseData.data[mask] = ma.masked # Now flag for very noisey channels. if max_noise_factor > 0: vars = ma.var(NoiseData.data, 0) mean_vars = ma.mean(vars, -1).filled(0) bad_chans = vars.filled(0) > max_noise_factor * mean_vars[:,:,None] Data.data[:,bad_chans] = ma.masked NoiseData.data[:,bad_chans] = ma.masked
def ifas_masked_std(array, axis=None): """ This returns the true standard deviation of the data. It only counts valid data. There are outstanding problems with how the masked arrays handle stds. For some reason, there is no np.ma.nanstd function. This adds that functionality. Parameters ---------- array : ndarray The value or array of values by which the standard deviation will be taken from. axis : int The axis that the median will be taken over. Returns ------- true_std : float or ndarray The standard deviation of the array along which ever axis was given. """ # Fix all invalid data before taking the median. valid_array = np_ma.fix_invalid(array) # Test to see if there is any invalid data left. if (np.any(np.isnan(valid_array))): raise core.error.DataError("The array still contains invalid nan " "data after the invalid data was fixed. " "The true mean function will not work " "as expected.") if (np.any(np.isinf(valid_array))): raise core.error.DataError("The array still contains invalid inf " "data after the invalid data was fixed. " "The true mean function will not work " "as expected.") # Calculate and return the standard deviation. The masked array # version of the functions seems to properly ignore masks as # intended. true_std = np_ma.std(valid_array, axis=axis) return true_std
def average_combine(self, scale_func=None, scale_to=1.0): """Average combine together a set of arrays. A CCDData object is returned with the data property set to the average of the arrays. If the data was masked or any data have been rejected, those pixels will not be included in the median. A mask will be returned, and if a pixel has been rejected in all images, it will be masked. The uncertainty of the combined image is set by the standard deviation of the input images. Returns ------- combined_image: `~ccdproc.CCDData` CCDData object based on the combined input of CCDData objects. """ if self.scaling is not None: scalings = self.scaling else: scalings = 1.0 #set up the data data, wei = ma.average(scalings * self.data_arr, axis=0, weights=self.weights, returned=True) #set up the mask mask = self.data_arr.mask.sum(axis=0) mask = (mask == len(self.data_arr)) #set up the deviation uncertainty = ma.std(self.data_arr, axis=0) #create the combined image combined_image = CCDData(data.data, mask=mask, unit=self.unit, uncertainty=StdDevUncertainty(uncertainty)) #update the meta data combined_image.meta['NCOMBINE'] = len(self.data_arr) #return the combined image return combined_image
def res_dist(x, y, e, n_runs=100, random_state=None): x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4, random_state=random_state) test_res = [] train_res = [] start_time = time() for i in range(n_runs): e.fit(x_train, y_train) train_res.append(e.score(x_train, y_train)) test_res.append(e.score(x_test, y_test)) if (i % (n_runs / 10) == 0): print("%d" % i, end=' ') print("\nTime: %.3f secs" % (time() - start_time)) print("Test Min: %.3f Mean: %.3f Max: %.3f SD: %.3f" % (min(test_res), mean(test_res), max(test_res), std(test_res))) print("Train Min: %.3f Mean: %.3f Max: %.3f SD: %.3f" % ( min(train_res), mean(train_res), max(train_res), std(train_res))) return (train_res, test_res)
def destroy_with_variance(Data, sigma_thres=6, bad_freq_list=[]): '''Mask spikes in Data using variance. Polarizations must be in XX,XY,YX,YY format. sigma_thres represents how sensitive the flagger is (smaller = more masking). The flagged frequencies are appended to bad_freq_list.''' XX_YY_0 = ma.mean(Data.data[:, 0, 0, :], 0) * ma.mean(Data.data[:, 3, 0, :], 0) XX_YY_1 = ma.mean(Data.data[:, 0, 1, :], 0) * ma.mean(Data.data[:, 3, 1, :], 0) # Get the normalized variance array for each polarization. a = ma.var(Data.data[:, 0, 0, :], 0) / (ma.mean(Data.data[:, 0, 0, :], 0)**2) # XX b = ma.var(Data.data[:, 1, 0, :], 0) / XX_YY_0 # XY c = ma.var(Data.data[:, 2, 0, :], 0) / XX_YY_0 # YX d = ma.var(Data.data[:, 3, 0, :], 0) / (ma.mean(Data.data[:, 3, 0, :], 0)**2) # YY # And for cal off. e = ma.var(Data.data[:, 0, 1, :], 0) / (ma.mean(Data.data[:, 0, 1, :], 0)**2) # XX f = ma.var(Data.data[:, 1, 1, :], 0) / XX_YY_1 # XY g = ma.var(Data.data[:, 2, 1, :], 0) / XX_YY_1 # YX h = ma.var(Data.data[:, 3, 1, :], 0) / (ma.mean(Data.data[:, 3, 1, :], 0)**2) # YY # Get the mean and standard deviation [sigma]. means = sp.array([ma.mean(a), ma.mean(b), ma.mean(c), ma.mean(d), ma.mean(e), ma.mean(f), ma.mean(g), ma.mean(h)]) sig = sp.array([ma.std(a), ma.std(b), ma.std(c), ma.std(d), ma.std(e), ma.std(f), ma.std(g), ma.std(h)]) # Get the max accepted value [sigma_thres*sigma, sigma_thres=6 works really well]. max_sig = sigma_thres*sig max_accepted = means + max_sig amount_masked = 0 for freq in range(0, len(a)): if ((a[freq] > max_accepted[0]) or (b[freq] > max_accepted[1]) or (c[freq] > max_accepted[2]) or (d[freq] > max_accepted[3]) or (e[freq] > max_accepted[4]) or (f[freq] > max_accepted[5]) or (g[freq] > max_accepted[6]) or (h[freq] > max_accepted[7])): # mask amount_masked += 1 bad_freq_list.append(freq) Data.data[:,:,:,freq].mask = True return amount_masked
def average_combine(self, scale_func=None, scale_to=1.0): """Average combine together a set of arrays. A CCDData object is returned with the data property set to the average of the arrays. If the data was masked or any data have been rejected, those pixels will not be included in the median. A mask will be returned, and if a pixel has been rejected in all images, it will be masked. The uncertainty of the combined image is set by the standard deviation of the input images. Returns ------- combined_image: `~ccdproc.CCDData` CCDData object based on the combined input of CCDData objects. """ if self.scaling is not None: scalings = self.scaling else: scalings = 1.0 #set up the data data, wei = ma.average(scalings * self.data_arr, axis=0, weights=self.weights, returned=True) #set up the mask mask = self.data_arr.mask.sum(axis=0) mask = (mask == len(self.data_arr)) #set up the deviation uncertainty = ma.std(self.data_arr, axis=0) # create the combined image with a dtype that matches the combiner combined_image = CCDData(np.asarray(data.data, dtype=self.dtype), mask=mask, unit=self.unit, uncertainty=StdDevUncertainty(uncertainty)) #update the meta data combined_image.meta['NCOMBINE'] = len(self.data_arr) #return the combined image return combined_image
def measure(mode, x, y, x0, x1): """ return the mean and standard deviation of y in the window x0 to x1 """ xm = ma.masked_outside(x, x0, x1) ym = ma.array(y, mask=ma.getmask(xm)) if mode == 'mean': r1 = ma.mean(ym) r2 = ma.std(ym) if mode == 'max': r1 = ma.max(ym) r2 = 0 if mode == 'min': r1 = ma.min(ym) r2 = 0 if mode == 'median': r1 = ma.median(ym) r2 = 0 if mode == 'p2p': # peak to peak r1 = ma.ptp(ym) r2 = 0 return (r1, r2)
def dynamic_mask(self, image, sigrange): """ Creates a numpy mask on the image, filtering out any pixel values that are more than sigrange*std from the median value Input: numpy array of the image, sigrange for multiplier on standard dev range Output: Masked numpy array covering any pixels above or below the standard dev range """ # Make a masked array using the static mask and imput image pre_masked = ma.array(image, mask=self.static_mask) # Mask saturated or empty masked1 = ma.masked_greater(pre_masked, 254) masked1 = ma.masked_less(masked1, 0) median = ma.median(masked1) mean = ma.mean(masked1) std = ma.std(masked1) return masked1, median, mean, std
def __fit__(self, rating, row=True): if isinstance(rating, ma.MaskedArray): self._rating = rating else: self._rating = ma.masked_equal(rating, 0) self._mean = ma.mean(self._rating, axis=1, keepdims=True) self._sigma = ma.std(self._rating, axis=1, keepdims=True) self._mean_center_rating = self._rating - self._mean self._z = self._mean_center_rating / self._sigma assert self.config.sim_config.name in ["person", "discounted_person", "amplify_person", "idf_person", "pca_person","cosine"] similaritor = SimilaritorFactory(self.config.sim_config) if row: self._sim = similaritor(rating=self._rating, mean_center_rating=self._mean_center_rating) else: self._rating = self._rating.T self._mean_center_rating = self._mean_center_rating.T self._z = self._z.T self._sim = similaritor(rating=self._rating, mean_center_rating=self._mean_center_rating)