def destroy_time_with_mean_arrays_2pol(Data, flag_size=40): '''Mask times with high means. This is the same as last function, but for Parkes 2 pol data. ''' # Get the means over all frequencies. (for all pols. and cals.) a = ma.mean(Data.data[:, 0, 0, :], -1) b = ma.mean(Data.data[:, 1, 0, :], -1) # Get means and std for all arrays. means = sp.array([ma.mean(a), ma.mean(b)]) sig = sp.array([ma.std(a), ma.std(b)]) # Get max accepted values. max_accepted = means + 3*sig # Get min accepted values. min_accepted = means - 3*sig # Find bad times. bad_times = [] for time in range(0,len(a)): if ((a[time] > max_accepted[0]) or (b[time] > max_accepted[1]) or (a[time] < min_accepted[0]) or (b[time] < min_accepted[1])): bad_times.append(time) # Mask bad times and those +- flag_size around. for time in bad_times: if time-flag_size < 0: Data.data[0:(time+flag_size),:,:,:].mask = True else: Data.data[(time-flag_size):(time+flag_size),:,:,:].mask = True return
def average(tsl): # find fastest dt: dt_common = 1e12 for ts in tsl: if ts.dt < dt_common: newtime = ts.timearray() dt_common = ts.dt # interpolate all series to new dt: tslip = [ts.interpolate(newtime, dt_common) for ts in tsl] if len(tslip[0].data.shape) == 1: ave = np.empty((len(tslip), len(tslip[0].data))) else: ave = np.empty( (len(tslip), tslip[0].data.shape[0], tslip[0].data.shape[1])) for its, ts in enumerate(tslip): if len(ts.data.shape) == 1: ave[its] = ts.data else: ave[its, :, :] = ts.data[:, :] if len(ts.data.shape) == 1: return Timeseries(ma.mean(ave, axis=0), dt_common) else: avef = ma.zeros((tslip[0].data.shape[0], tslip[0].data.shape[1])) for nrow, row in enumerate(avef): avef[nrow, :] = ma.mean(ave[:, nrow, :], axis=0) return Timeseries(avef, dt_common)
def test_rbf(x, annual_sample_mean=annual_sample_mean): coords_vals = year_sample_dict_data[x] x_nodes = [] y_nodes = [] z_nodes = [] values_list = [] for item in coords_vals: x_nodes.append(item[0]) y_nodes.append(item[1]) z_nodes.append(item[2]) values_list.append(coords_vals[item]) xs = x_nodes ys = y_nodes zs = z_nodes values_list = np.array(values_list) time_len = 64 all_data = year_stack[x, :time_len, :lat_end, :lon_end] ### New and improved and faster!!! annual_mean = np.mean(all_data) sample_mean = np.mean(values_list) annual_stdev = np.std(all_data) sample_stdev = np.std(values_list) annual_sample_mean[x, :time_len, :lat_end, :lon_end] for item in year_sample_dict_data[x].keys(): #print item annual_sample_mean[x,item[0], item[1], item[2]] = year_sample_dict_data[x][item] plt.close('all') plt.subplot(2, 1, 1) plt.pcolormesh(ma.mean(all_data, 0), vmin=-5, vmax=15); plt.colorbar(); plt.axis('tight') plt.subplot(2, 1, 2) plt.pcolormesh(ma.mean(annual_sample_mean[x, :, :, :], 0), vmin=-5, vmax=15); plt.colorbar(); plt.axis('tight') plt.savefig(output_dir+coords+'_year_'+str(x)+'.png') plt.close('all') fitness = np.abs(annual_mean-sample_mean) + np.abs(annual_stdev - sample_stdev) return fitness, annual_mean, sample_mean, annual_stdev, sample_stdev #, annual_sample_mean
def search_oscillation(data, pol, delta): info = list() _data = data.getAll(pol=pol) mean_spectras = ma.mean(_data, axis=1) mean_spectra = ma.mean(mean_spectras, axis=0) mean_low = ma.mean(_data.min(axis=1)) info.append((-1, 0, 0, 0)) for rcu in data.getActiveRcus(pol): rcu_bin = rcu if pol not in ('XY', 'xy'): rcu_bin /= 2 #logger.debug("rcu=%d rcu_bin=%d" %(rcu, rcu_bin)) #max_peak_val = 0 max_n_peaks = 0 max_sum_peaks = 0 peaks = cSearchPeak(mean_spectras[rcu_bin,:] - mean_spectra) if peaks.valid_data: peaks.search(delta=delta, max_width=8) max_val = mean_spectras[rcu_bin,:].max() max_n_peaks = peaks.nMaxPeaks() max_sum_peaks = peaks.getSumPeaks() bin_low = _data[rcu_bin,:,:].min(axis=0).mean() if max_n_peaks > 5: logger.debug("rcu_bin=%d: number-of-peaks=%d max_value=%3.1f peaks_sum=%5.3f low_value=%3.1f" %\ (rcu_bin, max_n_peaks, max_val, max_sum_peaks, bin_low)) if bin_low > (mean_low + 2.0): #peaks.getSumPeaks() > (median_sum_peaks * 2.0): info.append((rcu_bin, max_sum_peaks, max_n_peaks, bin_low)) if max_val > 150.0: # only one high peek info.append((rcu_bin, max_sum_peaks, max_n_peaks, bin_low)) return (info) #(sorted(info,reverse=True))
def destroy_with_variance_2pol(Data, sigma_thres=6, bad_freq_list=[], submean=True): '''Mask frequencies with high variance. This is the same as last function, but for Parkes 2 pol data. ''' # Get the normalized variance array for each polarization. #Data.data[Data.data>3] = ma.masked #Data.data[Data.data<3] = ma.masked Data.data[np.isnan(Data.data)] = ma.masked Data.data[Data.data <= 0.] = ma.masked if submean: a = ma.var(Data.data[:,0,0,:],0)/(ma.mean(Data.data[:,0,0,:],0)**2)#XX b = ma.var(Data.data[:,1,0,:],0)/(ma.mean(Data.data[:,1,0,:],0)**2)#YY else: a = ma.var(Data.data[:,0,0,:],0) b = ma.var(Data.data[:,1,0,:],0) # Get the mean and standard deviation [sigma]. means = sp.array([ma.mean(a), ma.mean(b)]) sig = sp.array([ma.std(a), ma.std(b)]) # Get the max accepted value [sigma_thres*sigma, sigma_thres=6 works really well]. max_sig = sigma_thres*sig max_accepted = means + max_sig min_accepted = means - max_sig amount_masked = 0 for freq in range(0, len(a)): if ((a[freq] > max_accepted[0]) or (b[freq] > max_accepted[1]) or (a[freq] < min_accepted[0]) or (b[freq] < min_accepted[1])): # mask amount_masked += 1 bad_freq_list.append(freq) Data.data[:,:,:,freq].mask = True return amount_masked
def test_correlate(self) : Data = self.blocks[0] Data.calc_freq() map = self.map gain = 3.45 const = 2.14 # Set all data = gain*(cos(time_ind)). Data.data[:,:,:,:] = gain*sp.cos(sp.arange(1,11) [:,sp.newaxis,sp.newaxis,sp.newaxis]) # Explicitly set time mean to something known. Data.data -= ma.mean(Data.data, 0) Data.data += gain*const*Data.freq/800.0e6 # Now the Map. map[:,:,:] = 0.0 # Set 10 pixels to match cos part of data. map[:, range(10), range(10)] = ( sp.cos(sp.arange(1,11)[None, :])) map[:, range(10), range(10)] -= ma.mean( map[:, range(10), range(10)], 1)[:, None] # Give Map a mean to test things out. Should really have no effect. map[...] += 0.352*map.get_axis('freq')[:, None, None]/800.0e6 # Rig the pointing to point to those 10 pixels. def rigged_pointing() : Data.ra = map.get_axis('ra')[range(10)] Data.dec = map.get_axis('dec')[range(10)] Data.calc_pointing = rigged_pointing solved_gains = smd.sub_map(Data, map, correlate=True) # Now data should be just be gain*const*f, within machine precision. Data.data /= gain*Data.freq/800.0e6 self.assertTrue(sp.allclose(Data.data[:,:,:,:], const)) self.assertTrue(sp.allclose(solved_gains, gain))
def calculateMeans(self, synMean, synMin, synMed, synMax, synMinCP): """ Calculate mean, median, minimum, maximum and percentiles of pressure values from synthetic events. :param synMean: `numpy.ndarray` :param synMin: `numpy.ndarray` :param synMed: `numpy.ndarray` :param synMax: `numpy.ndarray` :param synMinCP: `numpy.ndarray` """ synMean = ma.masked_values(synMean, -9999.) synMin = ma.masked_values(synMin, -9999.) synMed = ma.masked_values(synMed, -9999.) synMax = ma.masked_values(synMax, -9999.) self.synMean = ma.mean(synMean, axis=0) self.synMed = ma.mean(synMed, axis=0) self.synMin = ma.mean(synMin, axis=0) self.synMax = ma.mean(synMax, axis=0) self.synMeanUpper = percentile(ma.compressed(synMean), per=95, axis=0) self.synMeanLower = percentile(ma.compressed(synMean), per=5, axis=0) self.synMinUpper = percentile(ma.compressed(synMin), per=95, axis=0) self.synMinLower = percentile(ma.compressed(synMin), per=5, axis=0) self.synMinCPDist = np.mean(synMinCP, axis=0) self.synMinCPLower = percentile(synMinCP, per=5, axis=0) self.synMinCPUpper = percentile(synMinCP, per=95, axis=0) r = list(np.random.uniform(high=synMean.shape[0], size=3).astype(int)) self.synRandomMinima = synMean[r, :, :]
def centerfit_1dsymmetry(image, minlen=None, cen_exc=5, mask=None, plotit=False): """Return indices of an approximate symmetry center in a 2D image. The symmetry center of the 2D array is calculated from 1D mean values calculated in horizontal and vertical directions. The 1D centers of symmetry are determined by extracting two symmetric subarrays around a point and calculating chi-squared between them. This is done for all points where `minlen`+`cen_exc` symmetric points are available. The point with the smallest value of chi-squared is returned as the center of symmetry. Keyword arguments: `minlen` : Minimum length of subarrays from which chisq is calculated. `cen_exc`: Number of points around the tested center excluded from the symmetry check. The returned index will be in the range [minlen+cen_exc, len(arr)-(minlen+cen_exc)]. """ if mask is None: im = ma.array(image) else: im = ma.array(image, mask=np.logical_not(mask)) ymean = ma.mean(im, axis=0).astype(np.float64) xmean = ma.mean(im, axis=1).astype(np.float64) kwas = { 'minlen' : minlen, 'cen_exc' : cen_exc } return symcen1d(ymean, **kwas), symcen1d(xmean, **kwas)
def average(tsl): # find fastest dt: dt_common = 1e12 for ts in tsl: if ts.dt < dt_common: newtime = ts.timearray() dt_common = ts.dt # interpolate all series to new dt: tslip = [ts.interpolate(newtime, dt_common) for ts in tsl] if len(tslip[0].data.shape)==1: ave = np.empty((len(tslip), len(tslip[0].data))) else: ave = np.empty((len(tslip), tslip[0].data.shape[0], tslip[0].data.shape[1])) for its, ts in enumerate(tslip): if len(ts.data.shape)==1: ave[its] = ts.data else: ave[its,:,:] = ts.data[:,:] if len(ts.data.shape)==1: return Timeseries(ma.mean(ave, axis=0), dt_common) else: avef = ma.zeros((tslip[0].data.shape[0], tslip[0].data.shape[1])) for nrow, row in enumerate(avef): avef[nrow,:] = ma.mean(ave[:,nrow,:], axis=0) return Timeseries(avef, dt_common)
def print_time_stats(start, bf_plot, af_plot, end): print '# Timing statistics:' # print 'start %f, bf_plot %f, af_plot %f, end %f' % (start, bf_plot, af_plot, end) print 'Before plot: %f , %.1f%%' % (mean(bf_plot - start), mean(bf_plot - start) / mean(end - start) * 100) print 'Plotting: %f , %.1f%%' % (mean(af_plot - bf_plot), mean(af_plot - bf_plot) / mean(end - start) * 100) print 'Writing files: %f , %.1f%%' % (mean(end - af_plot), mean(end - af_plot) / mean(end - start) * 100) print 'Total: %f , %.1f%%' % (mean(end - start), mean(end - start) / mean(end - start) * 100)
def calc_froude_number(along_u, gprime, interface_depth, adcp_z, seafloor): """Calculate composite Froude number G""" G, top_u, bot_u, Fr_top_sq, Fr_bot_sq = preall( gprime.shape, copies=5, initial_value=np.nan) for i, u_i in enumerate(along_u.T): top_layer_inds = adcp_z <= interface_depth[i] bot_layer_inds = adcp_z > interface_depth[i] if 0 in [top_layer_inds.sum(), bot_layer_inds.sum()]: # Insufficient data to calculate Froude number continue top_u[i] = ma.mean(u_i[top_layer_inds]) bot_u[i] = ma.mean(u_i[bot_layer_inds]) Fr_top_sq[i] = top_u[i]**2/(gprime[i]*interface_depth[i]) Fr_bot_sq[i] = bot_u[i]**2/(gprime[i]*(seafloor[i] - interface_depth[i])) G_squared = (Fr_top_sq[i] + Fr_bot_sq[i]) G[i] = np.sqrt(G_squared) # fig, ax = plt.subplots(nrows=2, sharex=True) # ax[0].plot(top_u, 'b') # ax[0].plot(bot_u, 'r') # ax[1].plot(seafloor) return G, Fr_top_sq, Fr_bot_sq
def getGriddedFowlerCurlFromDaily(m, files, lon, xptsM, yptsM, xptsG, yptsG, lonsG, latsG, dxRes): xvelG = ma.masked_all((size(files), lonsG.shape[0], lonsG.shape[1])) yvelG = ma.masked_all((size(files), lonsG.shape[0], lonsG.shape[1])) curlG = ma.masked_all((size(files), lonsG.shape[0], lonsG.shape[1])) #print 'uvel', uvelD.shape x = 0 for file in files: fd = open(file, 'rb') motionDat = fromfile(file=fd, dtype='<i2') motionDat = reshape(motionDat, [361, 361, 3]) xt = motionDat[:, :, 0] / 1000. yt = motionDat[:, :, 1] / 1000. q = motionDat[:, :, 2] / 1000. mask = where((q <= 0) | (q > 1), 0, 1) xt = ma.masked_where(mask < 0.5, xt) yt = ma.masked_where(mask < 0.5, yt) alpha = lon * pi / 180. uvelT = yt * sin(alpha) + xt * cos(alpha) vvelT = yt * cos(alpha) - xt * sin(alpha) # Set masked values back to nan for gridding purposes uvelT[where(ma.getmask(uvelT))] = np.nan vvelT[where(ma.getmask(vvelT))] = np.nan #print uvel # Re-grid data #print uvel.flatten().shape, xptsM.flatten().shape, xptsG.shape uvelG = griddata((xptsM.flatten(), yptsM.flatten()), uvelT.flatten(), (xptsG, yptsG), method='linear') vvelG = griddata((xptsM.flatten(), yptsM.flatten()), vvelT.flatten(), (xptsG, yptsG), method='linear') # Rotate data onto new grid xvelGT, yvelGT = m.rotate_vector(uvelG, vvelG, lonsG, latsG) xvelGT = ma.masked_invalid(xvelGT) yvelGT = ma.masked_invalid(yvelGT) xvelG[x] = xvelGT yvelG[x] = yvelGT curlG[x] = calcCurlSq2dXYGradient(xvelGT, yvelGT, dxRes) # print x, curlG[x] x += 1 # COULD ROTATE HERE AND DO CURL OF DAILY VARIABLES. xvelMean = ma.mean(xvelG, axis=0) yvelMean = ma.mean(yvelG, axis=0) curlMean = ma.mean(curlG, axis=0) #vvelD=vstack([vvelD, vvelT]) return xvelMean, yvelMean, curlMean
def Portrait_diagram_subregion(obs_subregion_mean, obs_name, model_subregion_mean, model_names, seasonal_cycle, file_name, normalize=True): nmodel, nt, nregion = model_subregion_mean.shape if seasonal_cycle: obs_data = ma.mean( obs_subregion_mean.reshape([1, nt / 12, 12, nregion]), axis=1) model_data = ma.mean( model_subregion_mean.reshape([nmodel, nt / 12, 12, nregion]), axis=1) nt = 12 else: obs_data = obs_subregion_mean model_data = model_subregion_mean subregion_metrics = ma.zeros([4, nregion, nmodel]) for imodel in np.arange(nmodel): for iregion in np.arange(nregion): # First metric: bias subregion_metrics[0, iregion, imodel] = metrics.calc_bias( model_data[imodel, :, iregion], obs_data[0, :, iregion], average_over_time=True) # Second metric: standard deviation subregion_metrics[1, iregion, imodel] = metrics.calc_stddev_ratio( model_data[imodel, :, iregion], obs_data[0, :, iregion]) # Third metric: RMSE subregion_metrics[2, iregion, imodel] = metrics.calc_rmse( model_data[imodel, :, iregion], obs_data[0, :, iregion]) # Fourth metric: correlation subregion_metrics[3, iregion, imodel] = metrics.calc_correlation( model_data[imodel, :, iregion], obs_data[0, :, iregion]) if normalize: for iregion in np.arange(nregion): subregion_metrics[0, iregion, :] = subregion_metrics[ 0, iregion, :] / ma.std(obs_data[0, :, iregion]) * 100. subregion_metrics[ 1, iregion, :] = subregion_metrics[1, iregion, :] * 100. subregion_metrics[2, iregion, :] = subregion_metrics[ 2, iregion, :] / ma.std(obs_data[0, :, iregion]) * 100. region_names = ['R%02d' % i for i in np.arange(nregion) + 1] for imetric, metric in enumerate(['bias', 'std', 'RMSE', 'corr']): plotter.draw_portrait_diagram( subregion_metrics[imetric, :, :], region_names, model_names, file_name + '_' + metric, xlabel='model', ylabel='region')
def Portrait_diagram_subregion(obs_subregion_mean, obs_name, model_subregion_mean, model_names, seasonal_cycle, file_name, normalize=True): nmodel, nt, nregion = model_subregion_mean.shape if seasonal_cycle: obs_data = ma.mean(obs_subregion_mean.reshape( [1, nt / 12, 12, nregion]), axis=1) model_data = ma.mean(model_subregion_mean.reshape( [nmodel, nt / 12, 12, nregion]), axis=1) nt = 12 else: obs_data = obs_subregion_mean model_data = model_subregion_mean subregion_metrics = ma.zeros([4, nregion, nmodel]) for imodel in np.arange(nmodel): for iregion in np.arange(nregion): # First metric: bias subregion_metrics[0, iregion, imodel] = metrics.calc_bias( model_data[imodel, :, iregion], obs_data[0, :, iregion], average_over_time=True) # Second metric: standard deviation subregion_metrics[1, iregion, imodel] = metrics.calc_stddev_ratio( model_data[imodel, :, iregion], obs_data[0, :, iregion]) # Third metric: RMSE subregion_metrics[2, iregion, imodel] = metrics.calc_rmse( model_data[imodel, :, iregion], obs_data[0, :, iregion]) # Fourth metric: correlation subregion_metrics[3, iregion, imodel] = metrics.calc_correlation( model_data[imodel, :, iregion], obs_data[0, :, iregion]) if normalize: for iregion in np.arange(nregion): subregion_metrics[0, iregion, :] = subregion_metrics[ 0, iregion, :] / ma.std(obs_data[0, :, iregion]) * 100. subregion_metrics[ 1, iregion, :] = subregion_metrics[1, iregion, :] * 100. subregion_metrics[2, iregion, :] = subregion_metrics[ 2, iregion, :] / ma.std(obs_data[0, :, iregion]) * 100. region_names = ['R%02d' % i for i in np.arange(nregion) + 1] for imetric, metric in enumerate(['bias', 'std', 'RMSE', 'corr']): plotter.draw_portrait_diagram(subregion_metrics[imetric, :, :], region_names, model_names, file_name + '_' + metric, xlabel='model', ylabel='region')
def compute_freqs(self): """ Computes the frequencies of the SNPs. Stored in self.freqs. """ if self.ndim == 2: self.freqs = ma.mean(self.gts,axis=0)/2.0 elif self.ndim == 3: self.freqs = ma.mean(self.gts[:,0,:], axis=0) / 2.0
def Time_series_subregion(obs_subregion_mean, obs_name, model_subregion_mean, model_names, seasonal_cycle, file_name, row, column, x_tick=['']): nmodel, nt, nregion = model_subregion_mean.shape if seasonal_cycle: obs_data = ma.mean( obs_subregion_mean.reshape([1, nt / 12, 12, nregion]), axis=1) model_data = ma.mean( model_subregion_mean.reshape([nmodel, nt / 12, 12, nregion]), axis=1) nt = 12 else: obs_data = obs_subregion_mean model_data = model_subregion_mean x_axis = np.arange(nt) x_tick_values = x_axis fig = plt.figure() rcParams['xtick.labelsize'] = 6 rcParams['ytick.labelsize'] = 6 for iregion in np.arange(nregion): ax = fig.add_subplot(row, column, iregion + 1) x_tick_labels = [''] if iregion + 1 > column * (row - 1): x_tick_labels = x_tick else: x_tick_labels = [''] ax.plot( x_axis, obs_data[0, :, iregion], color='r', lw=2, label=obs_name) for imodel in np.arange(nmodel): ax.plot( x_axis, model_data[imodel, :, iregion], lw=0.5, label=model_names[imodel]) ax.set_xlim([-0.5, nt - 0.5]) ax.set_xticks(x_tick_values) ax.set_xticklabels(x_tick_labels) ax.set_title('Region %02d' % (iregion + 1), fontsize=8) ax.legend( bbox_to_anchor=(-0.2, row / 2), loc='center', prop={'size': 7}, frameon=False) fig.subplots_adjust(hspace=0.7, wspace=0.5) fig.savefig(file_name, dpi=600, bbox_inches='tight')
def mean_normalise(self): if not self.mean_normalised: if self.gts.ndim == 2: self.gts = self.gts - ma.mean(self.gts, axis=0) elif self.gts.ndim == 3: for i in range(0, self.gts.shape[1]): self.gts[:, i, :] = self.gts[:, i, :] - ma.mean( self.gts[:, i, :], axis=0) self.mean_normalised = True
def getGriddedFowlerFromDaily(m, files, lon, xptsM, yptsM, xptsG, yptsG, lonsG, latsG): uvelD = ma.masked_all((size(files), lon.shape[0], lon.shape[1])) vvelD = ma.masked_all((size(files), lon.shape[0], lon.shape[1])) # print 'uvel', uvelD.shape x = 0 for file in files: fd = open(file, 'rb') motionDat = fromfile(file=fd, dtype='<i2') motionDat = reshape(motionDat, [361, 361, 3]) xt = motionDat[:, :, 0] / 1000. yt = motionDat[:, :, 1] / 1000. q = motionDat[:, :, 2] / 1000. mask = where((q <= 0) | (q > 1), 0, 1) xt = ma.masked_where(mask < 0.5, xt) yt = ma.masked_where(mask < 0.5, yt) # Comes in xy coordinates so need to rotate to UV #xvel= ma.masked_where(np.isnan(xt), xt) #yvel = ma.masked_where(np.isnan(yt), yt) #xvel=f[0] #yvel=f[1] alpha = lon * pi / 180. uvelT = yt * sin(alpha) + xt * cos(alpha) vvelT = yt * cos(alpha) - xt * sin(alpha) uvelD[x] = uvelT vvelD[x] = vvelT x += 1 # COULD ROTATE HERE AND DO CURL OF DAILY VARIABLES. #vvelD=vstack([vvelD, vvelT]) uvel = ma.mean(uvelD, axis=0) vvel = ma.mean(vvelD, axis=0) #print uvel #if we want to set masked values back to nan for gridding purposes uvel[where(ma.getmask(uvel))] = np.nan vvel[where(ma.getmask(vvel))] = np.nan #print uvel # Re-grid data # print uvel.flatten().shape, xptsM.flatten().shape, xptsG.shape uvelG = griddata((xptsM.flatten(), yptsM.flatten()), uvel.flatten(), (xptsG, yptsG), method='linear') vvelG = griddata((xptsM.flatten(), yptsM.flatten()), vvel.flatten(), (xptsG, yptsG), method='linear') # Rotate data onto new grid xvelG, yvelG = m.rotate_vector(uvelG, vvelG, lonsG, latsG) xvelG = ma.masked_invalid(xvelG) yvelG = ma.masked_invalid(yvelG) return xvelG, yvelG
def filter_missingness(self, max_missing = 5, verbose=False): if self.ndim == 2: missingness = ma.mean(self.gts.mask,axis=0) elif self.ndim == 3: missingness = ma.mean(self.gts.mask,axis = (0,1)) missingness_pass = 100 * missingness < max_missing if verbose: print(str(self.freqs.shape[0] - np.sum(missingness_pass)) + ' SNPs with missingness >' + str(max_missing) + '%') self.filter(missingness_pass)
def test_fave_scale(self) : hanning.hanning_smooth(self.Data) rebin_freq.rebin(self.Data, 2.) cal_scale.scale_by_cal(self.Data, False, True, False) data = self.Data.data self.assertTrue(ma.allclose(ma.mean(data[:,0,0,:] - data[:,0,1,:], -1), 1.0)) self.assertTrue(ma.allclose(ma.mean(data[:,3,0,:] - data[:,3,1,:], -1), 1.0))
def update_ranges(self, x_data, y_data): s_x = ma.mean(x_data) s_y = ma.mean(y_data) bounds = [ ma.min(x_data) - s_x, ma.max(x_data) + s_x, ma.min(y_data) - s_y, ma.max(y_data) + s_y ] plt.axis(bounds)
def mean_normalise(self): """ This normalises the SNPs/PGS columns to have mean-zero. """ if not self.mean_normalised: if self.gts.ndim == 2: self.gts = self.gts - ma.mean(self.gts,axis=0) elif self.gts.ndim==3: for i in range(0, self.gts.shape[1]): self.gts[:, i, :] = self.gts[:, i, :] - ma.mean(self.gts[:, i, :], axis=0) self.mean_normalised = True
def mol_calibration_coef(self, mol=None, atb=None, navg=30, alt=lidar_alt, metalt=met_alt, idx=None, navgh=50, zmin=30, zmax=34): """ Returns the molecular calibration coefficient, computed from atb 532 nm and molecular density profiles, averaged between zmin and zmax km vertically, and [i-navgh:i+navgh] profiles horizontally using a moving average. shape [nprof] """ if mol is None and atb is None: mol = self.mol_on_lidar_alt(navg=navg, alt=alt, metalt=metalt, idx=idx) atb = self.atb(navg=navg, idx=idx) # remove atb and molecular unfit for calibration purposes # this level of backscattering is most probably due to noise # in the lower stratosphere # (and if it's not noise we don't want it anyway) atb[np.abs(atb) > atb_max[self.z]] = np.nan mol[mol < 0] = np.nan atb = ma.masked_invalid(atb) mol = ma.masked_invalid(mol) idx = (alt >= zmin) & (alt <= zmax) atb_calib_profile = ma.mean(atb[:, idx], axis=1) mol_calib_profile = ma.mean(mol[:, idx], axis=1) # now do a moving average, weeding out bad profiles atbbounds = iatb_bounds[self.z] nprof = mol.shape[0] coef = np.zeros([nprof]) for i in np.arange(nprof): idxh = np.r_[np.max([0, i - navgh]):np.min([nprof - 1, i + navgh])] atbslice = atb_calib_profile[idxh] molslice = mol_calib_profile[idxh] idx = (atbslice > atbbounds[0]) & (atbslice < atbbounds[1]) coef[i] = ma.mean(atbslice[idx]) / ma.mean(molslice) return coef
def filter_foregrounds(Data, n_bands=20, time_bins_smooth=10.): """Gets an estimate of the foregrounds and subtracts it out of the data. The Foreground removal is very rough, just used to push the foreground down a bunch so the RFI can be more easily found. Two things are done to estimate the foregrounds: averaging over a fairly wide band, and smoothing to just below the beam crossing time scale. Parameters ---------- Data : DataBolock object Data from which to remove the foregrounds. n_bands : int Number of bands to split the data into. Forgrounds are assumed to be the same throughout this band. time_bins : float Number of time bins to smooth over to find the foregrounds (full width half max of the filter kernal). Should be shorter than the beam crossing time (by about a factor of 2). """ # Some basic numbers. n_chan = Data.dims[-1] sub_band_width = float(n_chan)/n_bands # First up, initialize the smoothing kernal. width = time_bins_smooth/2.355 # Two sigma edge cut off. nk = round(4*width) + 1 smoothing_kernal = sig.gaussian(nk, width) smoothing_kernal /= sp.sum(smoothing_kernal) smoothing_kernal.shape = (nk, 1, 1) # Now loop through the sub-bands. Foregrounds are assumed to be identical # within a sub-band. for subband_ii in range(n_bands): # Figure out what data is in this subband. band_start = round(subband_ii * sub_band_width) band_end = round((subband_ii + 1) * sub_band_width) data = Data.data[:,:,:,band_start:band_end] # Estimate the forgrounds. # Take the band mean. foregrounds = ma.mean(data, -1) # Now low pass filter. fore_weights = (sp.ones(foregrounds.shape, dtype=float) - ma.getmaskarray(foregrounds)) foregrounds -= ma.mean(foregrounds, 0) foregrounds = foregrounds.filled(0) foregrounds = sig.convolve(foregrounds, smoothing_kernal, mode='same') fore_weights = sig.convolve(fore_weights, smoothing_kernal, mode='same') foregrounds /= fore_weights # Subtract out the foregrounds. #print data.shape data[...] -= foregrounds[:,:,:,None]
def filter_foregrounds(Data, n_bands=20, time_bins_smooth=10.): """Gets an estimate of the foregrounds and subtracts it out of the data. The Foreground removal is very rough, just used to push the foreground down a bunch so the RFI can be more easily found. Two things are done to estimate the foregrounds: averaging over a fairly wide band, and smoothing to just below the beam crossing time scale. Parameters ---------- Data : DataBolock object Data from which to remove the foregrounds. n_bands : int Number of bands to split the data into. Forgrounds are assumed to be the same throughout this band. time_bins : float Number of time bins to smooth over to find the foregrounds (full width half max of the filter kernal). Should be shorter than the beam crossing time (by about a factor of 2). """ # Some basic numbers. n_chan = Data.dims[-1] sub_band_width = float(n_chan) / n_bands # First up, initialize the smoothing kernal. width = time_bins_smooth / 2.355 # Two sigma edge cut off. nk = round(4 * width) + 1 smoothing_kernal = sig.gaussian(nk, width) smoothing_kernal /= sp.sum(smoothing_kernal) smoothing_kernal.shape = (nk, 1, 1) # Now loop through the sub-bands. Foregrounds are assumed to be identical # within a sub-band. for subband_ii in range(n_bands): # Figure out what data is in this subband. band_start = round(subband_ii * sub_band_width) band_end = round((subband_ii + 1) * sub_band_width) data = Data.data[:, :, :, band_start:band_end] # Estimate the forgrounds. # Take the band mean. foregrounds = ma.mean(data, -1) # Now low pass filter. fore_weights = (sp.ones(foregrounds.shape, dtype=float) - ma.getmaskarray(foregrounds)) foregrounds -= ma.mean(foregrounds, 0) foregrounds = foregrounds.filled(0) foregrounds = sig.convolve(foregrounds, smoothing_kernal, mode='same') fore_weights = sig.convolve(fore_weights, smoothing_kernal, mode='same') foregrounds /= fore_weights # Subtract out the foregrounds. data[...] -= foregrounds[:, :, :, None]
def rms_profile_convergence(masked_vel, height): moving_avg = masked_vel[0][height, :].reshape( masked_vel[0][height, :].shape[0], 1) rmse_series = np.zeros((masked_vel.shape[0] - 1, )) for f in range(1, masked_vel.shape[0] + 1): moving_avg_new = np.sqrt( nma.mean(masked_vel[0:f][:, height, :]**2, axis=0)) rmse = np.sqrt(nma.mean((moving_avg_new - moving_avg)**2)) moving_avg = moving_avg_new rmse_series[f - 1] = rmse return rmse_series
def plot_stdevs(data, name): data /= np.max(np.abs(data), axis=0) year_stack=ma.array(np.split(data, 10, axis=0)) vmin, vmax = 0, 0.5 #~ plt.figure(figsize=(10, 10), dpi=50) curr_map = Basemap(projection='cyl', llcrnrlon=ll_lon, llcrnrlat=ll_lat, urcrnrlon=ur_lon, urcrnrlat=ur_lat, resolution='i', area_thresh=100.) x, y = curr_map(lon, lat) plt.subplot(411) stdev_all_data = ma.std(data, axis=0) im = curr_map.pcolormesh(x, y, stdev_all_data , vmin=vmin, vmax=vmax, cmap=cmap) plt.axis('tight') plt.colorbar() curr_map.drawcoastlines() curr_map.fillcontinents(color='grey',lake_color='aqua') #~ plt.title('stdev_all_data'+ longname) plt.subplot(412) annual_means = ma.mean(year_stack, axis = 1) stdev_annual_means = ma.std(annual_means, axis=0) im = curr_map.pcolormesh(x, y, stdev_annual_means , vmin=vmin, vmax=vmax, cmap=cmap) plt.axis('tight') plt.colorbar() curr_map.drawcoastlines() curr_map.fillcontinents(color='grey',lake_color='aqua') #~ plt.title('Standard Deviation of the Annual Averages'+ longname) plt.subplot(413) signal_array = ma.mean(year_stack, axis=0) stdev_seasonal = ma.std(signal_array, axis=0) im = curr_map.pcolormesh(x, y, stdev_seasonal , vmin=vmin, vmax=vmax, cmap=cmap) plt.axis('tight') plt.colorbar() curr_map.drawcoastlines() curr_map.fillcontinents(color='grey',lake_color='aqua') #~ plt.title('stdev_seasonal'+ longname) plt.subplot(414) stdev_all_data = ma.std(data, axis=0) signal_array = ma.mean(year_stack, axis=0) stdev_seasonal = ma.std(signal_array, axis=0) stdev_non_seasonal = stdev_all_data - stdev_seasonal #~ stdev_non_seasonal = ma.stdev(noise_array, axis=0) im = curr_map.pcolormesh(x, y, stdev_non_seasonal, vmin=vmin, vmax=vmax, cmap=cmap) plt.axis('tight') plt.colorbar() curr_map.drawcoastlines() curr_map.fillcontinents(color='grey',lake_color='aqua') #~ plt.title('stdev_non_seasonal' + longname) plt.savefig('/home/nicholas/masters/figures/newplots/standard_deviations_' + name+ '.png') plt.close('all')
def flagging(data,freq,sigma_thres,linscale): """ Flags data for RFI. Designed for a single time step scan. Uses a sigma threshold to flag out anything with RFI over a certain threshold. Expects data to be linear for spline (s=1e-10). want to try something else. seems like using db data getting reasonable results for s = 1e4 Also flags out NaNs, infs. Output is flagging mask for input data array. """ # data = 10.**(data/10.) mask = zeros(len(data)) nanmask = array(where(isnan(data))[0]) mask[nanmask] = 1.0 infmask = array(where(isinf(data))[0]) mask[infmask] = 1.0 scale = linscale for f in range(0, len(data)/scale-1): # smooth = itp.UnivariateSpline(freq[f*scale:(f+1)*scale],data[f*scale:(f+1)*scale]) (Fa,Fb) = polyfit(freq[f*scale:(f+1)*scale],data[f*scale:(f+1)*scale],1) # smooth = itp.interp1d(freq[f*scale:(f+1)*scale],data[f*scale:(f+1)*scale],'linear') flat_data = data[f*scale:(f+1)*scale]/polyval([Fa,Fb],freq[f*scale:(f+1)*scale]) flat_sigma = ma.std(flat_data) flat_mean = ma.mean(flat_data) max_accept = 1.0+flat_sigma*sigma_thres min_accept = 1.0-flat_sigma*sigma_thres maxmask = array(where(flat_data>max_accept)[0]) minmask = array(where(flat_data<min_accept)[0]) maxmask = maxmask+f*scale minmask = minmask+f*scale mask[maxmask] = 1.0 mask[minmask] = 1.0 # smooth = itp.UnivariateSpline(freq[(f+1)*scale:-1],data[(f+1)*scale:-1]) # smooth = itp.interp1d(freq[(f+1)*scale:-1],data[(f+1)*scale:-1],'linear') (Fa,Fb) = polyfit(freq[(f+1)*scale:-1],data[(f+1)*scale:-1],1) flat_data = data[(f+1)*scale:-1]/polyval([Fa,Fb],freq[(f+1)*scale:-1]) # flat_data = data[(f+1)*scale:-1]/smooth(freq[(f+1)*scale:-1]) flat_sigma = ma.std(flat_data) flat_mean = ma.mean(flat_data) max_accept = 1.0+flat_sigma*sigma_thres min_accept = 1.0-flat_sigma*sigma_thres maxmask = array(where(flat_data>max_accept)[0]) minmask = array(where(flat_data<min_accept)[0]) maxmask = maxmask+(f+1)*scale minmask = minmask+(f+1)*scale mask[maxmask] = 1.0 mask[minmask] = 1.0 return mask
def rms_masked(piv_file, mask_file, start_frame, end_frame): """ Input: piv --str: path to fluctuating .npy vel file mask --str: path to .npy mask file Output: rms_mvel --array: masked numpy array""" masked_vel = am(piv_file, mask_file) avg_vel = nma.mean(masked_vel[start_frame:end_frame, :, :], axis=0) rms_mvel = np.sqrt( nma.mean((masked_vel[start_frame:end_frame, :, :] - avg_vel)**2, axis=0)) return rms_mvel
def flagging(data,freq,sigma_thres,linscale): """ Flags data for RFI. Designed for a single time step scan. Uses a sigma threshold to flag out anything with RFI over a certain threshold. Also flags out NaNs, infs. Inputs are: data - linear input freq - can be any units sigma_thres - cutoff for bad data linscale - size of flattened window Output is flagging mask for input data array. """ mask = np.zeros(len(data)) nanmask = np.where(np.isnan(data))[0] mask[nanmask] = 1.0 infmask = np.where(np.isinf(data))[0] mask[infmask] = 1.0 scale = linscale for f in range(0, len(data)/scale-1): (Fa,Fb) = np.polyfit(freq[f*scale:(f+1)*scale],data[f*scale:(f+1)*scale],1) flat_data = data[f*scale:(f+1)*scale]/np.polyval([Fa,Fb],freq[f*scale:(f+1)*scale]) flat_sigma = ma.std(flat_data) flat_mean = ma.mean(flat_data) max_accept = 1.0+flat_sigma*sigma_thres min_accept = 1.0-flat_sigma*sigma_thres maxmask = ma.array(np.where(flat_data>max_accept)[0]) minmask = ma.array(np.where(flat_data<min_accept)[0]) maxmask = maxmask+f*scale minmask = minmask+f*scale mask[maxmask] = 1.0 mask[minmask] = 1.0 (Fa,Fb) = np.polyfit(freq[(f+1)*scale:-1],data[(f+1)*scale:-1],1) flat_data = data[(f+1)*scale:-1]/np.polyval([Fa,Fb],freq[(f+1)*scale:-1]) flat_sigma = ma.std(flat_data) flat_mean = ma.mean(flat_data) max_accept = 1.0+flat_sigma*sigma_thres min_accept = 1.0-flat_sigma*sigma_thres maxmask = ma.array(np.where(flat_data>max_accept)[0]) minmask = ma.array(np.where(flat_data<min_accept)[0]) maxmask = maxmask+(f+1)*scale minmask = minmask+(f+1)*scale mask[maxmask] = 1.0 mask[minmask] = 1.0 return mask
def splot(): plt.close("all") vmin, vmax = 0, 14 plt.subplot(2, 1, 1) plt.pcolormesh(ma.mean(array, 0), vmin=vmin, vmax=vmax) plt.colorbar() plt.axis("tight") plt.subplot(2, 1, 2) plt.pcolormesh(ma.mean(interp_array_m, 0), vmin=vmin, vmax=vmax) plt.colorbar() plt.axis("tight") # plt.show() plt.savefig("binary_remap.png") plt.close("all")
def ft_2d(data, name): #~ data /= np.max(np.abs(data), axis=0) vmin, vmax = 0, 15 plt.subplot(121) ft_lat = np.fft.rfft2(ma.mean(data, 2))[0:np.shape(data)[0]/2, :] plt.pcolormesh(np.log(ft_lat), vmin=vmin, vmax=vmax); plt.axis('tight'); plt.colorbar() plt.subplot(122) ft_lon = np.fft.rfft2(ma.mean(data, 1))[0:np.shape(data)[0]/2, :] plt.pcolormesh(np.log(ft_lon), vmin=vmin, vmax=vmax); plt.axis('tight'); plt.colorbar() #~ ax = plt.axes([0, 0, 0, 0], visible=False) #~ plt.subplot((223), visible=False) #~ plt.colorbar(orientation='horizontal', fraction=0.5) plt.savefig(FIGDIR+'ft_'+name+'.png') plt.close('all')
def msd_calc(track): """ msdcalc(track = pdarray) Returns numpy array containing MSD data calculated from an individual track. Parameters ---------- track : pandas dataframe containing, at a minimum a 'Frame', 'X', and 'Y' column Returns ---------- msd : numpy array the same length as track containing the calculated MSDs using the formula MSD = <(x-x0)**2> gauss : numpy array the same length as track containing the calculated Gaussianity Examples ---------- >>> d = {'Frame': [1, 2, 3, 4, 5], 'X': [5, 6, 7, 8, 9], 'Y': [6, 7, 8, 9, 10]} >>> df = pd.DataFrame(data=d) >>> msd_calc(df) (array([ 0., 2., 8., 18., 32.]), array([ 0. , 0.25, 0.25, 0.25, 0.25])) """ assert type(track['Frame']) == pd.core.series.Series, "track must contain column 'Frame'" assert type(track['X']) == pd.core.series.Series, "track must contain column 'X'" assert type(track['Y']) == pd.core.series.Series, "track must contain column 'Y'" assert track.shape[0] > 0, "track is empty" assert track['Frame'].dtype == np.int64 or np.float64, "Data in 'Frame' must be if type int64." assert track['X'].dtype == np.int64 or np.float64, "Data in 'X' must be if type int64." assert track['Y'].dtype == np.int64 or np.float64, "Data in 'Y' must be if type int64." length = track.shape[0] msd = np.zeros(length) gauss = np.zeros(length) for frame in range(0, length-1): # creates array to ignore when particles skip frames. inc = ma.masked_where(nth_diff(track['Frame'], n=frame+1) != frame+1, nth_diff(track['Frame'], n=frame+1)) x = ma.array(np.square(nth_diff(track['X'], n=frame+1)), mask=inc.mask) y = ma.array(np.square(nth_diff(track['Y'], n=frame+1)), mask=inc.mask) msd[frame+1] = ma.mean(x + y) gauss[frame+1] = ma.mean(x**2 + y**2)/(2*(msd[frame+1]**2)) return msd, gauss
def get_noise_levels(ncfile): # ---------------- # Open NetCDF file # ---------------- print('Opening NetCDF file ' + ncfile) dataset = nc4.Dataset(ncfile,'r+',format='NETCDF3_CLASSIC') nray = len(dataset.dimensions['time']); ngate = len(dataset.dimensions['range']); elv = np.transpose(np.tile(dataset.variables['elevation'][:],(ngate,1))); rng = np.tile(dataset.variables['range'][:],(nray,1)) height = rng*np.sin(elv*np.pi/180.) zh = dataset.variables['ZED_H'][:]; zed = ma.masked_where(height<14000, zh); rngkm = ma.masked_where(rng<=0.0, rng/1000.); range2 = 20.*ma.log10(rngkm); zh[:] = zed - range2; zv = zh.copy(); zv[:] = zh[:] - dataset.variables['ZDR'][:] zx = zh.copy(); zx[:] = zh[:] + dataset.variables['LDR'][:] nezharr = ma.mean(zh,axis=1) nezherr = ma.std(zh,axis=1) nezvarr = ma.mean(zv,axis=1) nezverr = ma.std(zv,axis=1) nezxarr = ma.mean(zx,axis=1) nezxerr = ma.std(zx,axis=1) nezharr = ma.masked_where(nezherr>MAX_ERR,nezharr) nezvarr = ma.masked_where(nezverr>MAX_ERR,nezvarr) nezxarr = ma.masked_where(nezxerr>MAX_ERR,nezxarr) nezh = ma.median(nezharr) nezv = ma.median(nezvarr) nezx = ma.median(nezxarr) dataset.close() return np.round(nezh,2), np.round(nezv,2), np.round(nezx,2)
def plot_auto(i): print "plot figure", i bl = BLindex.BLindex([i, i]) vi = vis[:, :, bl] vi = ma.masked_invalid(vi) plt.figure(figsize = (16,8)) #plt.plot(ma.mean(np.abs(vi), axis=0)) f_1d = ma.mean(np.abs(vi), axis=0) plt.plot(f_1d) plt.title('Spectrum of %d_%d (Mean = %.2fe10)' % (i, i, ma.mean(f_1d)/1e10)) plt.xlabel('Frequency points') plt.ylabel('Amplitude') plt.grid() plt.savefig('auto_graph/%d_%d.png' % (i, i))
def build_tree_vector(points_r,points_c,levels_left,local_out_array): tile_rs = tile[points_r,points_c].reshape( -1,fs); local_out_array[0,:] = ma.mean(tile_rs,axis=0) #plt.plot(points_r,points_c,'o') if levels_left > 1: remaining_out_array = local_out_array[1:,:] mean_r = np.mean(points_r); mean_c = np.mean(points_c) offset_size = remaining_out_array.shape[0]/4 top = points_r < mean_r bottom = np.logical_not(top) left = points_c < mean_c right = np.logical_not(left) quadrents = [ (top,right),(top,left),(bottom,left),(bottom,right) ] #Fill the solution for all 4 quadrents for idx,quadrent in enumerate(quadrents): q = np.logical_and(quadrent[0],quadrent[1]) q_out = remaining_out_array[ idx*offset_size : (idx+1)*offset_size, : ] build_tree_vector(points_r[q],points_c[q],levels_left - 1,q_out) #renormilize remaining_out_array *= .25
def aF_oneway(*args, **kwargs): dim = kwargs.get("dim", None) arrays = args means = [ma.mean(a, dim) for a in arrays] vars = [ma.var(a, dim) for a in arrays] lens = [ ma.sum(ma.array(ma.ones(a.shape), mask=ma.asarray(a).mask), dim) for a in arrays ] alldata = ma.concatenate(arrays, dim if dim is not None else 0) bign = ma.sum(ma.array(ma.ones(alldata.shape), mask=alldata.mask), dim) sstot = ma.sum(alldata**2, dim) - (ma.sum(alldata, dim)**2) / bign ssbn = ma.sum([(ma.sum(a, dim)**2) / L for a, L in zip(arrays, lens)], dim) # print ma.sum(alldata, dim) ** 2 / bign, ssbn ssbn -= ma.sum(alldata, dim)**2 / bign sswn = sstot - ssbn dfbn = dfnum = float(len(args) - 1.0) dfwn = bign - len(args) # + 1.0 F = (ssbn / dfbn) / (sswn / dfwn) if F.ndim == 0 and dfwn.ndim == 0: return (F,scipy.stats.betai(0.5 * dfwn, 0.5 * dfnum, dfwn/float(dfwn+dfnum*F)) if F is not ma.masked and dfwn/float(dfwn+dfnum*F) <= 1.0 \ and dfwn/float(dfwn+dfnum*F) >= 0.0 else ma.masked) else: prob = [scipy.stats.betai(0.5 * dfden, 0.5 * dfnum, dfden/float(dfden+dfnum*f)) if f is not ma.masked and dfden/float(dfden+dfnum*f) <= 1.0 \ and dfden/float(dfden+dfnum*f) >= 0.0 else ma.masked for dfden, f in zip (dfwn, F)] return F, prob
def ensemble(datasets): """ Generate a single dataset which is the mean of the input datasets An ensemble datasets combines input datasets assuming the all have similar shape, dimensions, and units. :param datasets: Datasets to be used to compose the ensemble dataset from. All Datasets must be the same shape. :type datasets: :class:`list` of :class:`dataset.Dataset` :returns: New Dataset with a name of 'Dataset Ensemble' :rtype: :class:`dataset.Dataset` """ _check_dataset_shapes(datasets) dataset_values = [dataset.values for dataset in datasets] ensemble_values = ma.mean(dataset_values, axis=0) # Build new dataset object from the input datasets and the ensemble values and return it ensemble_dataset = ds.Dataset(datasets[0].lats, datasets[0].lons, datasets[0].times, ensemble_values, units=datasets[0].units, name="Dataset Ensemble") return ensemble_dataset
def prep_etccdi_variable(input_path, index_name, aggregation, data_source): ds = xr.open_dataset(input_path) # Omit final year (2010) of HADEX2 - suspiciously large CDD for Malaysia if data_source == 'HADEX2': ds = ds.sel(time=slice(datetime.datetime(1951, 1, 1, 0), datetime.datetime(2009, 12, 31, 23))) # Calculate maximum rainfall value over whole period vals = ds[index_name].values if index_name in ['CWD', 'CDD']: vals = ds[index_name].values.astype('timedelta64[s]') vals = vals.astype('float32') / 86400.0 vals[vals < 0.0] = np.nan vals = ma.masked_invalid(vals) if aggregation == 'max': data = ma.max(vals, axis=0) if aggregation == 'mean': data = ma.mean(vals, axis=0) # Convert back from to a xarray DataArray for easy plotting # - masked array seems to be interpreted as np array (i.e. nans are present # in the xarray DataArray data2 = xr.DataArray(data, coords={ 'Latitude': ds['lat'].values, 'Longitude': ds['lon'].values }, dims=('Latitude', 'Longitude'), name=index_name) ds.close() return data2
def setUp(self): self.data = np.arange(12.0).reshape(3, 4) self.data[2, 1:] = np.nan self.array = as_lazy_data(self.data) masked_data = ma.masked_array(self.data, mask=np.isnan(self.data)) self.axis = 0 self.expected_masked = ma.mean(masked_data, axis=self.axis)
def apply_DEM_veto(dem, dam_nominal, dam_current, dam_bbox, resx, resy, dem_threshold=15, simplify=True): """ Applies veto to measured water extent based on Digital Eleveation Model (DEM) data. Regions of detected water above 15 meters above mean dem height of the lake are excluded. """ wb_nominal = get_raster_mask(dam_nominal, dam_bbox, dem.shape[1], dem.shape[0]) wb_current = get_raster_mask(dam_current, dam_bbox, dem.shape[1], dem.shape[0]) dem_masked = ma.masked_array(dem, mask=np.logical_not(wb_nominal)) dem_valid = dem < (ma.mean(dem_masked) + dem_threshold) dem_valid = np.logical_or(dem_valid, wb_nominal) wb_current = np.logical_and(dem_valid, wb_current) return get_water_extent(wb_current.astype(np.uint8), dam_nominal, dam_bbox, simplify)
def depth_average(self, Var): """Depth Average a variable in C-grid with varying depth""" Depth_av = (ma.mean( ma.divide(Var * np.tile(self.dzf, (self.Nx, self.Ny, 1)).T, self.Depth, axis=0))) return Depth_av
def plot_global_vector_map(uwind, vwind, lon, lat, figure_file, yskip=10, xskip=20): lons, lats = np.meshgrid(lon, lat) fig = plt.figure() ax = fig.add_subplot(1, 1, 1) m = Basemap(ax=ax, projection='eck4', lon_0=0, llcrnrlat=lat.min(), urcrnrlat=lat.max(), llcrnrlon=lon.min(), urcrnrlon=lon.max(), resolution='l', fix_aspect=True) x, y = m(lons, lats) m.drawcoastlines(linewidth=1) m.drawcountries(linewidth=.75) N = ma.mean( np.sqrt(uwind[::yskip, ::xskip]**2 + vwind[::yskip, ::xskip]**2)) max = m.quiver(x[::yskip, ::xskip], y[::yskip, ::xskip], uwind[::yskip, ::xskip] / N, vwind[::yskip, ::xskip] / N, color='blue', pivot='middle', headwidth=3) fig.savefig(figure_file, dpi=600, bbox_inches='tight') plt.show()
def monthmean(H, dtime, mon, y): """ Return de mean of H for the month mon of y year, indexed with dtime H: list, list of value from which we want to extract the mean value. dtime: netcdf time list convert with num2date. mon: int, month. (indexed 1-12) ATTENTION !!! y: int, year. """ debug = False i = finddate(dtime, 1, mon, y) if debug: print i if debug: print "lendtime", len(dtime), " lenH ", len(H) d1 = date(y, mon, 1) if debug: print "d1", d1 # Get date of next month if mon == 12: # if we have to change of year m2 = 1 y2 = y + 1 else: m2 = mon + 1 y2 = y d2 = date(y2, m2, 1) if debug: print "d2", d2 monlen = (d2 - d1).days monmean = ma.mean(H[i:(i + monlen + 1)]) return monmean
def calc_subregion_area_mean_and_std(dataset_array, subregions): ''' Calculate area mean and standard deviation values for a given subregions using datasets on common grid points :param dataset_array: An array of OCW Dataset Objects :type list: :param subregions: list of subregions :type subregions: :class:`numpy.ma.array` :returns: area averaged time series for the dataset of shape (ntime, nsubregion) ''' ndata = len(dataset_array) dataset0 = dataset_array[0] if dataset0.lons.ndim == 1: lons, lats = np.meshgrid(dataset0.lons, dataset0.lats) else: lons = dataset0.lons lats = dataset0.lats subregion_array = np.zeros(lons.shape) mask_array = dataset_array[0].values[0,:].mask # dataset0.values.shsape[0]: length of the time dimension # spatial average t_series =ma.zeros([ndata, dataset0.values.shape[0], len(subregions)]) # spatial standard deviation spatial_std =ma.zeros([ndata, dataset0.values.shape[0], len(subregions)]) for iregion, subregion in enumerate(subregions): lat_min, lat_max, lon_min, lon_max = subregion[1] y_index,x_index = np.where((lats >= lat_min) & (lats <= lat_max) & (lons >= lon_min) & (lons <= lon_max)) subregion_array[y_index,x_index] = iregion+1 for idata in np.arange(ndata): t_series[idata, :, iregion] = ma.mean(dataset_array[idata].values[:,y_index, x_index], axis=1) spatial_std[idata, :, iregion] = ma.std(dataset_array[idata].values[:,y_index, x_index], axis=1) subregion_array = ma.array(subregion_array, mask=mask_array) return t_series, spatial_std, subregion_array
def setUp(self): self.data = ma.arange(12).reshape(3, 4) self.data.mask = [[0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1]] # --> fractions of masked-points in columns = [0, 1/3, 2/3, 1] self.array = as_lazy_data(self.data) self.axis = 0 self.expected_masked = ma.mean(self.data, axis=self.axis)
def quantify_effect_smoothing_freq(): """Test how much total dissipation changes by smoothing density to 2, 3, 4, and 6 Hz""" casts = np.r_[45:1150:3] eps_all = np.full((4, casts.size), np.nan) for i, cast in enumerate(casts): try: print(cast, end=' ') _, data = loadMVP_m1(cast, bin_data=False) for j, n_smooth_rho in enumerate([4, 6, 8, 12]): eps, Lt = calc_eps( data['p_raw'], data['prho'], data['z'], plot_overturns=False, n_smooth_rho=n_smooth_rho) eps_all[j, i] = eps.sum() except IndexError: pass print('\n\nCompared to low-passing at 3Hz, filtering at a different freq\n' 'produces dissipation values of the following relative magnitude:') relative_mags = [ma.mean(ma.masked_invalid(eps_all[i]/eps_all[2])) for i in [0, 1, 3]] print(""" 6 Hz: {0:2.2f} 4 Hz: {1:2.2f} 2 Hz: {2:2.2f} """.format(*relative_mags)) return eps_all
def train_step_batch(self, epoch): D1 = ma.dot(self.vectors**2, self.weight_matrix) D2 = ma.dot(self.vectors, self.constant_matrix) Dist = D1 - D2 best_nodes = ma.argmin(Dist, 0) distances = ma.min(Dist, 0) ## print "q error:", ma.mean(ma.sqrt(distances + self.dist_cons)), self.radius(epoch) self.qerror.append(ma.mean(ma.sqrt(distances + self.dist_cons))) if self.neighbourhood == Map.NeighbourhoodGaussian: H = numpy.exp(-self.unit_distances / (2 * self.radius(epoch))) * ( self.unit_distances <= self.radius(epoch)) elif self.neighbourhood == Map.NeighbourhoodEpanechicov: H = 1.0 - (self.unit_distances / self.radius(epoch))**2 H = H * (H >= 0.0) else: H = 1.0 * (self.unit_distances <= self.radius(epoch)) P = numpy.zeros((self.vectors.shape[0], self.data.shape[0])) P[(best_nodes, list(range(len(best_nodes))))] = numpy.ones(len(best_nodes)) S = ma.dot(H, ma.dot(P, self.data)) A = ma.dot(H, ma.dot(P, ~self.data._mask)) ## nonzero = (range(epoch%2, len(self.vectors), 2), ) nonzero = (numpy.array(sorted(set(ma.nonzero(A)[0]))), ) self.vectors[nonzero] = S[nonzero] / A[nonzero]
def mean_(self): """ Calculates the mean of the image over the segmentation :return: """ return ma.mean(self.masked_img, 0)
def fit(self, data, y=None): """ Fit MultipleImputer to the input data. Parameters ---------- data : DataFrame, shape [n_samples, n_features] Input data. y : default None Ignore, argument required for constructing sklearn Pipeline. Returns ------- self """ mode_name = 'mode' mean_name = 'mean' median_name = 'median' allowed_strategies = {mode_name, mean_name, median_name} for k in self.strategies: if k not in allowed_strategies: msg = 'Can only use these strategies: {0} got strategy={1}' raise ValueError(msg.format(allowed_strategies, k)) statistics = {} if mean_name in self.strategies: mean_cols = self.strategies[mean_name] X_masked = self._get_masked(data, mean_cols) mean_masked = ma.mean(X_masked, axis=0) statistics[mean_name] = mean_masked.data if median_name in self.strategies: median_cols = self.strategies[median_name] X_masked = self._get_masked(data, median_cols) median_masked = ma.median(X_masked, axis=0) statistics[median_name] = median_masked.data # numpy MaskedArray doesn't seem to support the .mode # method yet, thus we roll out our own # https://docs.scipy.org/doc/numpy-1.13.0/reference/maskedarray.baseclass.html#maskedarray-baseclass if mode_name in self.strategies: mode_cols = self.strategies[mode_name] X_masked = self._get_masked(data, mode_cols) mode_values = np.empty(len(mode_cols)) # transpose to compute along each column instead of row. # TODO : # an embarrassingly parallel problem, needs to investigate # if this is a bottleneck zipped = zip(X_masked.data.T, X_masked.mask.T) for i, (col, col_mask) in enumerate(zipped): col_valid = col[~col_mask] values, _ = mode(col_valid) mode_values[i] = values[0] statistics[mode_name] = mode_values self.statistics_ = statistics return self
def test_corrcoef(self): r = ma.masked_equal(np.load("data/ml-1m/rating.npy"), 0) # sim = ma.corrcoef(r[0], r[2412]) # print(sim) # print(np.corrcoef(r[0].filled(0), r[2412].filled(0))) sim2 = ma.corrcoef(ma.vstack([r[0], r[2412]])) print(sim2) print(ma.dot(r[0], r[2412])/math.sqrt(ma.dot(r[0],r[0]))/math.sqrt(ma.dot(r[2412],r[2412]))) r0_m = r[0] - ma.mean(r[0]) r1_m = r[2412] - ma.mean(r[2412]) print(ma.dot(r0_m, r1_m)/math.sqrt(ma.dot(r0_m,r0_m))/math.sqrt(ma.dot(r1_m,r1_m)))