def cinfo(CL,param): """ This property returns information on the parameter in the cloud (all given in the units of the parameter). Note that the parameter is averaged over the entire cloud time at the altitude required (bottom, top or in-cloud) - not the case using vpinfo(CL,param). CloudObj.cinfo["bottom"]: param at the cloud base CloudObj.cinfo["top"]: param at the cloud top CloudObj.cinfo["mean"]: mean param through the cloud (in cloud) CloudObj.cinfo["median"]: median param through the cloud (in cloud) CloudObj.cinfo["stdev"]: standard deviation of the param through the cloud (in cloud) CloudObj.cinfo["delta"]: difference of param between the bottom and the top CloudObj.cinfo["slope"]: delta divided by the mean thickness The property can be accessed as e.g. CloudObj.cinfo["bottom"] or CloudObj.cinfo (dictionary) """ H=dict() H["bottom"]=list(); H["top"]=list(); H["mean"]=list(); H["median"]=list(); H["stdev"]=list(); H["delta"]=list(); H["slope"]=list(); H["units"]=list(); alt=[i for i,x in enumerate(CL.dttl) if x == 'altitude'][0] T=[i for i,x in enumerate(CL.dttl) if x == param][0] try: for i in range(len(CL.props["height"])): ix=nonzero((CL.data[alt]>=CL.props["height"][i][1])*(CL.data[alt]<=CL.props["height"][i][2])) H["bottom"].append(float(st.nanmedian(CL.data[T][nonzero((CL.data[alt]>=CL.props["height"][i][0])*(CL.data[alt]<=CL.props["height"][i][1]))]))) H["top"].append(float(st.nanmedian(CL.data[T][nonzero((CL.data[alt]>=CL.props["height"][i][2])*(CL.data[alt]<=CL.props["height"][i][3]))]))) H["mean"].append(float(st.nanmean(CL.data[T][ix]))) H["median"].append(float(st.nanmedian(CL.data[T][ix]))) H["stdev"].append(float(st.nanstd(CL.data[T][ix]))) H["delta"].append(H["bottom"][i]-H["top"][i]) H["slope"].append(H["delta"][i]/(np.mean([CL.props["height"][i][2], CL.props["height"][i][3]])-np.mean([CL.props["height"][i][0], CL.props["height"][i][1]]))) # units/meter H["units"].append(CL.dunit[T]) del ix except: print("[cinfo] Height properties must be defined first using the defheight method.") return H
def getBoxData(self, dataset, plot, coords): alldata = dataset.getAggregateDataAsArray(plot) box = alldata[coords[0]][coords[1]] mean = st.nanmean(box) median = st.nanmedian(box) sigma = numpy.std(box) return box, (mean, median, sigma)
def medabsdev(x,axis=0): """medabsdev(x,axis=0). Calculates and returns the median absolute deviation (MAD) based on https://en.wikipedia.org/wiki/Median_absolute_deviation (last visited June 2014). This function handles NaNs and masked values (masked arrays) by ignoring them. x (input) is the array on which the MAD is calculated. axis is the axis along which the MAD is calculated. Default is 0.""" x=copy.deepcopy(x) if 'array' not in str(type(x)).lower(): raise TypeError("x must be an array.") if 'ma' in str(type(x)).lower(): try: x[x.mask]=nan except: raise TypeError("Tried to translate masks into NaNs but failed.") if axis==0: med=st.nanmedian(x) mad=st.nanmedian(abs(x-med)) elif axis==1: med=st.nanmedian(x.transpose()) mad=st.nanmedian(abs(x.transpose()-med)) return mad
def fit_spec_poly5(xData, yData, dyData, order=5): """ Fit a 5th order polynomial to a spectrum. To avoid overflow errors the X-axis data should not be large numbers (e.g.: x10^9 Hz; use GHz instead). """ # Lower order limit is a line with slope if order < 1: order = 1 if order > 5: order = 5 # Estimate starting coefficients C1 = nanmean(np.diff(yData)) / nanmedian(np.diff(xData)) ind = int(np.median(np.where(~np.isnan(yData)))) C0 = yData[ind] - (C1 * xData[ind]) C5 = 0.0 C4 = 0.0 C3 = 0.0 C2 = 0.0 inParms = [{ 'value': C5, 'parname': 'C5' }, { 'value': C4, 'parname': 'C4' }, { 'value': C3, 'parname': 'C3' }, { 'value': C2, 'parname': 'C2' }, { 'value': C1, 'parname': 'C1' }, { 'value': C0, 'parname': 'C0' }] # Set the polynomial order for i in range(len(inParms)): if len(inParms) - i - 1 > order: inParms[i]['fixed'] = True else: inParms[i]['fixed'] = False # Function to evaluate the difference between the model and data. # This is minimised in the least-squared sense by the fitter def errFn(p, fjac=None): status = 0 return status, (poly5(p)(xData) - yData) / dyData # Use mpfit to perform the fitting mp = mpfit(errFn, parinfo=inParms, quiet=True) return mp
def getFlatNorm(flat, ysoln, low, high): from scipy.stats import stats f = getStraight(flat, ysoln, low, False) ftmp = numpy.where(f == 0, numpy.nan, f) norm = stats.nanmedian(ftmp[20:-20], 0) norm = numpy.where((norm <= 0) | numpy.isnan(norm), 1., norm) f /= norm img = numpy.ones((2400, 2400)) img[low * 2:high * 2] = st.resampley(f, ysoln[3], mode='nearest') return iT.resamp(derotate2(img), 2)
def MAD(a, c=0.6745, axis=0): """ Median Absolute Deviation along given axis of an array: median(abs(a - median(a))) / c """ a = np.asarray(a, np.float64) if a.ndim == 1: d = _nanmedian(a) m = _nanmedian(np.fabs(a - d) / c) else: d = nanmedian(a, axis=axis) # I don't want the array to change so I have to copy it? if axis > 0: aswp = np.swapaxes(a, 0, axis) else: aswp = a m = nanmedian(np.fabs(aswp - d) / c, axis=0) return m
def aggregate_ftr_matrix(self, ftr_matrix): sig = [] for ftr in ftr_matrix: median = stats.nanmedian(ftr) mean = stats.nanmean(ftr) std = stats.nanstd(ftr) # Invalid double scalars warning appears here skew = stats.skew(ftr) if any(ftr) else 0.0 kurtosis = stats.kurtosis(ftr) sig.extend([median, mean, std, skew, kurtosis]) return sig
def collapse(self, keys, vName): """ desc: Collapse the data by a (list of) keys and get statistics on a dependent variable. arguments: keys: desc: A key or list of keys to collapse the data on. type: [list, str, unicode] vName: desc: The dependent variable to collapse. Alternative, you can specifiy a function, in which case the error will be 0. type: [str, unicode, function] returns: desc: A DataMatrix with the collapsed data, with the descriptives statistics on `vName`. type: DataMatrix """ if isinstance(keys, basestring): keys = [keys] m = [keys + ['mean', 'median', 'std', 'se', '95ci', 'count']] for g in self.group(keys): l = [] for key in keys: l.append(g[key][0]) if type(vName) == types.FunctionType: l.append(vName(g)) l.append(np.nan) l.append(np.nan) l.append(np.nan) l.append(np.nan) l.append(len(g)) else: a = g[vName] l.append(nanmean(a)) l.append(nanmedian(a)) l.append(nanstd(a)) l.append(nanstd(a)/np.sqrt(a.size)) l.append(1.96*nanstd(a)/np.sqrt(a.size)) l.append(a.size) m.append(l) return DataMatrix(m)
def run_stats(x,n): """runstats(x,n). Calculates and returns the running mean, median, standard deviation, and median absolute deviation (MAD). This function handles NaNs and masked values (masked arrays) by ignoring them. x (input) is the array on which the running statistics are calculated (only one dimension, 1D array). n is the number of points taken in the running statistics window.""" x=copy.deepcopy(x) try: x.mask except: x=np.ma.array(x,mask=False) if len(np.shape(x))>2: raise ValueError("The array provided has more than 2 dimensions, at most 1 or 2 dimensions can be handled.") try: [ro,co]=np.shape(x) except: ro=np.shape(x)[0]; co=1 if ro==1 or co==1: ro=max(ro,co) x=x.reshape(ro,) else: raise ValueError("The array must be a vector (one column or row)") # initializing matrix M=ones([ro,n])*NaN; M=ma.asanyarray(M) # building matrix if n%2==1: # if n is odd for j in range(int(n/2),0,-1): posi=int(n/2)-j # current position M[0:ro-j,posi]=x[j:] for j in range(1,2+int(n/2),1): posi=int(n/2)+j-1; M[j-1:,posi]=x[0:(ro+1)-j] elif n%2==0: # if n is even for j in range(n/2,0,-1): posi=n/2-j M[0:ro-j,posi]=x[j:] for j in range(1,n/2+1): posi=n/2+j-1; M[j-1:,posi]=x[0:(ro+1)-j] else: print("Well, that's pretty weird. Are you sure n is an integer?") M.data[M.mask]=nan ave=st.nanmean(M, axis=1); med=st.nanmedian(M, axis=1); stde=st.nanstd(M, axis=1); mad=medabsdev(M,axis=1) return [ave, med, stde, mad]
def fit_spec_poly5(xData, yData, dyData, order=5): xData = np.array(xData, dtype='f8') yData = np.array(yData, dtype='f8') # Estimate starting coefficients C1 = nanmean(np.diff(yData)) / nanmedian(np.diff(xData)) ind = int(np.median(np.where(~np.isnan(yData)))) C0 = yData[ind] - (C1 * xData[ind]) if order<1: order=1 p0 = [0.0, 0.0, 0.0, 0.0, C1, C0] # Set the order p0 = p0[(-order-1):] def chisq(p, x, y): return np.sum( ((poly5(p)(x) - y)/ dyData)**2.0 ) # Use minimize to perform the fit return op.fmin_bfgs(chisq, p0, args=(xData, yData), full_output=1)
#=========================================================== fig = plt.figure(figsize=(3.62, 2.76)) ax = fig.add_subplot(111) data = cdata[prnd,:] zenSpacing = 0.5 median = [] zen = np.linspace(0,90,int(90./zenSpacing) +1) for z in zen : criterion = ( (data[:,2] < (z + zenSpacing/2.)) & (data[:,2] > (z - zenSpacing/2.)) ) ind = np.array(np.where(criterion))[0] tmp = data[ind,:] rout = esm.reject_outliers_arg(tmp[:,3],3) for i in rout : ax.plot(90.- z, tmp[i,3],'k.',alpha=0.5) median.append(nanmedian(data[ind,3])) ax.plot(90.-zen,median,'r-',alpha=0.5) for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + ax.get_xticklabels() + ax.get_yticklabels()): item.set_fontsize(8) ax.set_ylim([-35,35]) plt.tight_layout() plt.savefig(str(prn)+"_ele.png") plt.close() #================================================ az = np.linspace(0,360,721) fig = plt.figure(figsize=(3.62, 2.76))
# Python 中常用的统计工具有 Numpy, Pandas, PyMC, StatsModels 等。 # Scipy 中的子库 scipy.stats 中包含很多统计上的方法。 from numpy import * from matplotlib import pyplot # Numpy 自带简单的统计方法: heights = array([1.46, 1.79, 2.01, 1.75, 1.56, 1.69, 1.88, 1.76, 1.88, 1.78]) print('mean,', heights.mean()) print('min,', heights.min()) print('max', heights.max()) print('stand deviation,', heights.std()) # 导入 Scipy 的统计模块: import scipy.stats.stats as st print('median, ', st.nanmedian(heights)) # 忽略nan值之后的中位数 print('mode, ', st.mode(heights)) # 众数及其出现次数 print('skewness, ', st.skew(heights)) # 偏度 print('kurtosis, ', st.kurtosis(heights)) # 峰度 # 概率分布 # 常见的连续概率分布有: # 均匀分布 # 正态分布 # 学生t分布 # F分布 # Gamma分布 # ... # 离散概率分布: # 伯努利分布
def transect_flow_estimator(pre_process_input_file=None): """ Receives a list of ADCPTransectData objects from transect_preprocessor.py, and processes to find average velocities acording to options, and finally outputs flows. Flows can be written to CSV or and plotted across time, and invidual plot produced for each transect. Output fils=es are save to the outpath supplied by transect_preprocessor(). Inputs: pre_process_input_file = path to a transect_preprocessor input file, or None to use the default file. """ # initialize collector lists for time series plots mtimes = [] mean_u = [] mean_v = [] align_angle = [] total_flow = [] ustars = [] kxs = [] kys = [] data_file_num = -1 # setup compression option if flow_netcdf_data_compression: zlib = True else: zlib = None # prepare plot line, if any if flow_p1_lon_lat is None or flow_p2_lon_lat is None: ll_pline = None else: ll_pline = np.array([flow_p1_lon_lat, flow_p2_lon_lat]) (transects, outpath ) = transect_preprocessor.transect_preprocessor(pre_process_input_file) if flow_write_timeseries_flows_to_csv: logfname = os.path.join(outpath, 'transect_flow_log.csv') lf = open(logfname, 'wb') logfile = csv.writer(lf) logfile.writerow( ('filename', 'start_date', 'end_date', 'water_mode', 'bottom_mode' 'mean_velocity_U [m/s]', 'mean_velocity V [m/s]', 'flow_volume_U [m^3/s]', 'flow_volume_V [m^3/s]', 'sampling_area [m^2]', 'alignment_angle [degree]', 'notes')) for t in transects: fname, ext = os.path.splitext(t.source) outname = os.path.join(outpath, fname) if flow_regrid: if ll_pline is not None: if flow_regrid_normal_to_flow: print "Warning! Regridding plotline given, but options also ask for a flow-based plotline." print "Ignoring flow-based plotline option..." if t.xy_srs is not None: ll_srs = t.lonlat_srs if ll_srs is None: ll_srs = t.default_lonlat_srs pline = adcpy.util.coordinate_transform( ll_pline, ll_srs, t.xy_srs) else: print "ADCPData must be projected to use transect_flow_estimator" exit() else: if flow_regrid_normal_to_flow: flows = t.calc_ensemble_flows(range_from_velocities=True) pline = adcpy.util.map_flow_to_line( t.xy, flows[:, 0], flows[:, 1]) else: pline = adcpy.util.map_xy_to_line(t.xy) t.xy_regrid(dxy=flow_regrid_dxy, dz=flow_regrid_dz, pline=pline, sd_drop=flow_regrid_bin_sd_drop, mtime_regrid=True) else: t = t.average_ensembles(flow_crossprod_ens_num_ave) if flow_sd_drop > 0: t.sd_drop(sd=flow_sd_drop, sd_axis='elevation', interp_holes=flow_sd_drop_interp) t.sd_drop(sd=flow_sd_drop, sd_axis='ensemble', interp_holes=flow_sd_drop_interp) if flow_smooth_kernel > 2: t.kernel_smooth(kernel_size=flow_smooth_kernel) if flow_extrapolate_boundaries: t.extrapolate_boundaries() if flow_rotation is not None: t = adcpy_recipes.transect_rotate(t, flow_rotation) if flow_regrid: UVW,UVWens,flow,survey_area = \ adcpy_recipes.calc_transect_flows_from_uniform_velocity_grid(t,use_grid_only=False) Uflow = flow[0] Vflow = flow[1] U = UVW[0] V = UVW[1] else: U, Uflow, total_area, survey_area = t.calc_crossproduct_flow() V, Vflow = (np.nan, np.nan) if flow_plot_mean_vectors: fig3 = adcpy.plot.plot_ensemble_mean_vectors( t, title='Mean Velocity [m/s]') if flow_save_plots: adcpy.plot.plt.savefig("%s_mean_velocity.png" % outname) if flow_plot_flow_summmary: fig6 = adcpy.plot.plot_flow_summmary(t, title='Streamwise Summary', ures=0.1, vres=0.1, use_grid_flows=flow_regrid) if flow_save_plots: adcpy.plot.plt.savefig("%s_flow_summary.png" % outname) if flow_show_plots: adcpy.plot.show() adcpy.plot.plt.close('all') if (flow_save_processed_netcdf): fname = outname + '.flow_processed.nc' t.write_nc(fname, zlib=zlib) if flow_plot_timeseries_values or flow_write_timeseries_flows_to_csv: data_file_num += 1 # must fit to line to calc dispersion xy_line = adcpy.util.map_xy_to_line(t.xy) xd, yd, dd, xy_line = adcpy.util.find_projection_distances( t.xy, xy_line) ustar, kx, ky = adcpy.util.calcKxKy(t.velocity[:, :, 0], t.velocity[:, :, 1], dd, t.bin_center_elevation, t.bt_depth) if t.rotation_angle is not None: r_angle = t.rotation_angle * 180.0 / np.pi else: r_angle = 0.0 if flow_write_timeseries_flows_to_csv: times = t.date_time_str(filter_missing=True) try: w_mode_str = '%s' % t.raw_adcp.config.prof_mode bm = t.raw_adcp.bt_mode.tolist() b_mode = [] for bm1 in bm: bmi = int(bm1) if (bmi not in b_mode): b_mode.append(bmi) b_mode_str = '' for bm1 in b_mode: if (b_mode_str == ''): b_mode_str = '%i' % bm1 else: b_mode_str = '%s/%i' % (b_mode_str, bm1) except: w_mode_str = 'Unknown' b_mode_str = 'Unknown' logfile.writerow( (t.source, times[0], times[-1], w_mode_str, b_mode_str, '%7.4f' % U, '%7.4f' % V, '%10.2f' % Uflow, '%10.2f' % Vflow, '%10.2f' % survey_area, '%5.2f' % r_angle, t.history)) if flow_plot_timeseries_values: mtimes.append(sp.nanmedian(t.mtime)) mean_u.append(U) mean_v.append(V) total_flow.append(Uflow) align_angle.append(r_angle) ustars.append(ustar) kxs.append(kx) kys.append(ky) # plot timeseries data after all files have been processed if flow_plot_timeseries_values and data_file_num > 0: # sort by mtime mtimes = np.array(mtimes) nn = np.argsort(mtimes) mtimes = mtimes[nn] mean_u = np.array(mean_u)[nn] mean_v = np.array(mean_v)[nn] total_flow = np.array(total_flow)[nn] align_angle = np.array(align_angle)[nn] ustars = np.array(ustars)[nn] kxs = np.array(kxs)[nn] kys = np.array(kys)[nn] # plot timeseries figures fig_handle = plt.figure() plt.subplot(311) align_angle = np.array(align_angle) mtimes = np.array(mtimes) mean_u = np.array(mean_u) mean_v = np.array(mean_v) total_flow = np.array(total_flow) aa = -align_angle * np.pi / 180.0 if np.isnan(mean_v[0]): # probably crossproduct flows were calculated, which has zero v flow mean_v[:] = 0.0 uq = np.cos(aa) * mean_u + np.sin(aa) * mean_v vq = -np.sin(aa) * mean_u + np.cos(aa) * mean_v v_mag = np.sqrt(uq**2 + vq**2) vScale = np.max(v_mag) vScale = max(vScale, 0.126) qk_value = np.round(vScale * 4) / 4 Q = plt.quiver( mtimes, np.zeros(len(mtimes)), uq, vq, width=0.003, headlength=10, headwidth=7, scale=10 * vScale, #scale = 0.005, scale_units='width') qk = plt.quiverkey( Q, 0.5, 0.85, qk_value, r'%3.2f ' % qk_value + r'$ \frac{m}{s}$', labelpos='W', ) plt.title('Time series data: %s' % outpath) ax = plt.gca() ax.xaxis_date() plt.gcf().autofmt_xdate() ax.yaxis.set_visible(False) ax.set_xticklabels([]) plt.subplot(312) plt.plot(mtimes, total_flow) plt.ylabel('m^3/s') ax = plt.gca() ax.xaxis_date() plt.gcf().autofmt_xdate() ax.set_xticklabels([]) plt.subplot(313) plt.plot(mtimes, align_angle, 'bo') plt.ylabel('rotation angle') ax = plt.gca() ax.xaxis_date() plt.gcf().autofmt_xdate() ts_plot = os.path.join(outpath, 'time_series_plots.png') fig_handle.savefig(ts_plot) fig_handle = plt.figure(1111) plt.subplot(311) plt.plot(mtimes, ustars) plt.ylabel('u* m/s') ax = plt.gca() ax.xaxis_date() #plt.gcf().autofmt_xdate() ax.set_xticklabels([]) plt.title('Dispersion Coefficients: %s' % outpath) plt.subplot(312) plt.plot(mtimes, kxs) plt.ylabel('Kx m^2/s') ax = plt.gca() ax.xaxis_date() #plt.gcf().autofmt_xdate() ax.set_xticklabels([]) plt.subplot(313) plt.plot(mtimes, kys, 'b') plt.ylabel('Ky m^2/s') ax = plt.gca() ax.xaxis_date() plt.gcf().autofmt_xdate() ts_plot = os.path.join(outpath, 'time_series_dispersion.png') fig_handle.savefig(ts_plot) plt.close('all') if flow_write_timeseries_flows_to_csv: lf.close() print 'transect_flow_estimator completed!'
def transect_flow_estimator(pre_process_input_file=None): """ Receives a list of ADCPTransectData objects from transect_preprocessor.py, and processes to find average velocities acording to options, and finally outputs flows. Flows can be written to CSV or and plotted across time, and invidual plot produced for each transect. Output fils=es are save to the outpath supplied by transect_preprocessor(). Inputs: pre_process_input_file = path to a transect_preprocessor input file, or None to use the default file. """ # initialize collector lists for time series plots mtimes = [] mean_u = [] mean_v = [] align_angle = [] total_flow = [] ustars = [] kxs = [] kys = [] data_file_num = -1 # setup compression option if flow_netcdf_data_compression: zlib = True else: zlib = None # prepare plot line, if any if flow_p1_lon_lat is None or flow_p2_lon_lat is None: ll_pline = None else: ll_pline = np.array([flow_p1_lon_lat,flow_p2_lon_lat]) (transects,outpath) = transect_preprocessor.transect_preprocessor(pre_process_input_file) if flow_write_timeseries_flows_to_csv: logfname = os.path.join(outpath,'transect_flow_log.csv') lf = open(logfname,'wb') logfile = csv.writer(lf) logfile.writerow(('filename','start_date','end_date','water_mode', 'bottom_mode' 'mean_velocity_U [m/s]', 'mean_velocity V [m/s]', 'flow_volume_U [m^3/s]','flow_volume_V [m^3/s]', 'sampling_area [m^2]', 'alignment_angle [degree]', 'notes')) for t in transects: fname, ext = os.path.splitext(t.source) outname = os.path.join(outpath,fname) if flow_regrid: if ll_pline is not None: if flow_regrid_normal_to_flow: print "Warning! Regridding plotline given, but options also ask for a flow-based plotline." print "Ignoring flow-based plotline option..." if t.xy_srs is not None: ll_srs = t.lonlat_srs if ll_srs is None: ll_srs = t.default_lonlat_srs pline = adcpy.util.coordinate_transform(ll_pline,ll_srs,t.xy_srs) else: print "ADCPData must be projected to use transect_flow_estimator" exit() else: if flow_regrid_normal_to_flow: flows = t.calc_ensemble_flows(range_from_velocities=True) pline = adcpy.util.map_flow_to_line(t.xy,flows[:,0],flows[:,1]) else: pline = adcpy.util.map_xy_to_line(t.xy) t.xy_regrid(dxy=2.0,dz=0.25, pline=pline, sd_drop=flow_regrid_bin_sd_drop, mtime_regrid=True) else: t = t.average_ensembles(flow_crossprod_ens_num_ave) if flow_sd_drop > 0: t.sd_drop(sd=flow_sd_drop, sd_axis='elevation', interp_holes=flow_sd_drop_interp) t.sd_drop(sd=flow_sd_drop, sd_axis='ensemble', interp_holes=flow_sd_drop_interp) if flow_smooth_kernel > 2: t.kernel_smooth(kernel_size = flow_smooth_kernel) if flow_extrapolate_boundaries: t.extrapolate_boundaries() if flow_rotation is not None: t = adcpy_recipes.transect_rotate(t,flow_rotation) if flow_regrid: UVW,UVWens,flow,survey_area = \ adcpy_recipes.calc_transect_flows_from_uniform_velocity_grid(t,use_grid_only=False) Uflow = flow[0] Vflow = flow[1] U = UVW[0] V = UVW[1] else: U,Uflow,total_area,survey_area = t.calc_crossproduct_flow() V, Vflow = (np.nan,np.nan) if flow_plot_mean_vectors: fig3 = adcpy.plot.plot_ensemble_mean_vectors(t,title='Mean Velocity [m/s]') if flow_save_plots: adcpy.plot.plt.savefig("%s_mean_velocity.png"%outname) if flow_plot_flow_summmary: fig6 = adcpy.plot.plot_flow_summmary(t,title='Streamwise Summary', ures=0.1,vres=0.1, use_grid_flows=flow_regrid) if flow_save_plots: adcpy.plot.plt.savefig("%s_flow_summary.png"%outname) if flow_show_plots: adcpy.plot.show() adcpy.plot.plt.close('all') if (flow_save_processed_netcdf): fname = outname + '.flow_processed.nc' t.write_nc(fname,zlib=zlib) if flow_plot_timeseries_values or flow_write_timeseries_flows_to_csv: data_file_num += 1 # must fit to line to calc dispersion xy_line = adcpy.util.map_xy_to_line(t.xy) xd,yd,dd,xy_line = adcpy.util.find_projection_distances(t.xy,xy_line) ustar, kx, ky = adcpy.util.calcKxKy(t.velocity[:,:,0],t.velocity[:,:,1], dd,t.bin_center_elevation,t.bt_depth) if t.rotation_angle is not None: r_angle = t.rotation_angle*180.0/np.pi else: r_angle = 0.0 if flow_write_timeseries_flows_to_csv: times = t.date_time_str(filter_missing=True) try: w_mode_str = '%s'%t.raw_adcp.config.prof_mode bm = t.raw_adcp.bt_mode.tolist() b_mode = []; for bm1 in bm: bmi = int(bm1) if (bmi not in b_mode): b_mode.append(bmi) b_mode_str = '' for bm1 in b_mode: if (b_mode_str == ''): b_mode_str = '%i'%bm1 else: b_mode_str = '%s/%i'%(b_mode_str,bm1) except: w_mode_str = 'Unknown' b_mode_str = 'Unknown' logfile.writerow((t.source, times[0], times[-1], w_mode_str, b_mode_str, '%7.4f'%U, '%7.4f'%V, '%10.2f'%Uflow, '%10.2f'%Vflow, '%10.2f'%survey_area, '%5.2f'%r_angle, t.history)) if flow_plot_timeseries_values: mtimes.append(sp.nanmedian(t.mtime)) mean_u.append(U) mean_v.append(V) total_flow.append(Uflow) align_angle.append(r_angle) ustars.append(ustar) kxs.append(kx) kys.append(ky) # plot timeseries data after all files have been processed if flow_plot_timeseries_values and data_file_num>0: # plot timeseries figures fig_handle = plt.figure() plt.subplot(311) align_angle= np.array(align_angle) mtimes = np.array(mtimes) mean_u = np.array(mean_u) mean_v = np.array(mean_v) total_flow = np.array(total_flow) aa = -align_angle*np.pi/180.0 uq = np.cos(aa)*mean_u + np.sin(aa)*mean_v vq = -np.sin(aa)*mean_u + np.cos(aa)*mean_v v_mag = np.sqrt(uq**2 + vq**2) vScale = np.max(v_mag) vScale = max(vScale,0.126) qk_value = np.round(vScale*4)/4 Q = plt.quiver(mtimes,np.zeros(len(mtimes)),uq,vq, width=0.003, headlength=10, headwidth=7, scale = 10*vScale, #scale = 0.005, scale_units = 'width' ) qk = plt.quiverkey(Q, 0.5, 0.85, qk_value, r'%3.2f '%qk_value + r'$ \frac{m}{s}$', labelpos='W', ) plt.title('Time series data: %s'%outpath) ax = plt.gca() ax.xaxis_date() plt.gcf().autofmt_xdate() ax.yaxis.set_visible(False) ax.set_xticklabels([]) plt.subplot(312) plt.plot(mtimes,total_flow) plt.ylabel('m^3/s') ax = plt.gca() ax.xaxis_date() plt.gcf().autofmt_xdate() ax.set_xticklabels([]) plt.subplot(313) plt.plot(mtimes,align_angle,'bo') plt.ylabel('rotation angle') ax = plt.gca() ax.xaxis_date() plt.gcf().autofmt_xdate() ts_plot = os.path.join(outpath,'time_series_plots.png') fig_handle.savefig(ts_plot) fig_handle = plt.figure(1111) plt.subplot(311) plt.plot(mtimes,ustars) plt.ylabel('u* m/s') ax = plt.gca() ax.xaxis_date() #plt.gcf().autofmt_xdate() ax.set_xticklabels([]) plt.title('Dispersion Coefficients: %s'%outpath) plt.subplot(312) plt.plot(mtimes,kxs) plt.ylabel('Kx m^2/s') ax = plt.gca() ax.xaxis_date() #plt.gcf().autofmt_xdate() ax.set_xticklabels([]) plt.subplot(313) plt.plot(mtimes,kys,'b') plt.ylabel('Ky m^2/s') ax = plt.gca() ax.xaxis_date() plt.gcf().autofmt_xdate() ts_plot = os.path.join(outpath,'time_series_dispersion.png') fig_handle.savefig(ts_plot) plt.close('all') if flow_write_timeseries_flows_to_csv: lf.close() print 'transect_flow_estimator completed!'
def process_file(filename,outfile,sg07,options): input = open(filename,'rt') fwtS = open(outfile,'wb') region = options.ex/2 posS = "" negS = "" posnoS = "" negnoS = "" allS = "" countS = 0 countnoS = 0 tagS = [] tagnoS = [] std = [] for line in input: if line.startswith("#"): fwtS.write(line) continue if(len(line.split("\t")) == 9): chrom,junk,junk,start,end,tag,strand,junk,attr = line.split("\t") if float(get_std(attr.rstrip())) == 0: tagS.append(int(tag)) if strand == "+": posS = posS+line else: negS = negS+line else: countnoS +=1 std.append(float(get_std(attr.rstrip()))) tagnoS.append(int(tag)) allS = allS+line if strand == "+": posnoS = posnoS+line else: negnoS = negnoS+line #posS_int_negnoS = pybedtools.BedTool(posS,from_string=True).slop(g=sg07,l=10,r=10).intersect(pybedtools.BedTool(negnoS,from_string=True),u=True) # add half the exclusion zone to start and end coordinate #negS_int_posnoS = pybedtools.BedTool(negS,from_string=True).slop(g=sg07,l=10,r=10).intersect(pybedtools.BedTool(posnoS,from_string=True),u=True) try: posS_int_negnoS = pybedtools.BedTool(posS,from_string=True).intersect(pybedtools.BedTool(negnoS,from_string=True).slop(g=sg07,l=region,r=region),u=True) # add half the exclusion zone to start and end coordinate negS_int_posnoS = pybedtools.BedTool(negS,from_string=True).intersect(pybedtools.BedTool(posnoS,from_string=True).slop(g=sg07,l=region,r=region),u=True) fullFile = allS+str(posS_int_negnoS)+str(negS_int_posnoS) # merging singletons and non singletons together countS = posS_int_negnoS.count() + negS_int_posnoS.count() # Couting all the singletongs fwtS.write(str(pybedtools.BedTool(fullFile,from_string=True).sort())) except pybedtools.helpers.BEDToolsError: # if the peak call file had no singletons in it. print "The file had no singletons" countS = 0 fullFile = allS fwtS.write(str(pybedtools.BedTool(fullFile,from_string=True).sort())) ratio = signal2noise(tagnoS,tagS) # calculating the signal to noise ratio. #fullFile = allS+str(posS_int_negnoS)+str(negS_int_posnoS) # merging singletons and non singletons together #countS = posS_int_negnoS.count() + negS_int_posnoS.count() # Couting all the singletongs #fwtS.write(str(pybedtools.BedTool(fullFile,from_string=True).sort())) #ratio = signal2noise(tagnoS,tagS) # calculating the signal to noise ratio. allInfo[os.path.basename(filename)] = ".\t.\t"+str(countnoS)+"\t"+str(countS)+"\t"+str(st.nanmedian(tagnoS))+"\t"+str(st.nanmean(tagnoS))+"\t"+str(st.nanmedian(std))+"\t"+str(st.nanmean(std))+"\t"+str(ratio)
def vpinfo(CL,param,base='bg'): """ This method returns information on the chosen parameter from CloudObj.dttl in the cloud for all vertical scan. The averaging of the parameter is done in the particular column of the vertical scan. Options: param: string containing the title of the parameter as found in CloudObj.dttl or CloudObj.extrattl base: method to find the cloud base and top. Default is best guess (defBGheight) base='bg'; to use the 4-point method (defheight) base='4point'. Returns H: H["bottom"]: parameter at the cloud base H["top"]: parameter at the cloud top H["mean"]: mean parameter through the cloud H["median"]: median parameter through the cloud H["minimum"]: minimum parameter through the cloud H["maximum"]: maximum parameter through the cloud H["stdev"]: standard deviation of the parameter through the cloud H["delta"]: difference of parameter between the bottom and the top H["slope"]: delta divided by the mean thickness H["units"]: units of the parameter """ if type(param)==str: pass else: param=str(param) H=dict() altp=[i for i,x in enumerate(CL.dttl) if x == 'altitude'][0] tim=[i for i,x in enumerate(CL.dttl) if x == 'time'][0] T=[i for i,x in enumerate(CL.dttl) if x.lower() == param.lower()] if len(T)==1: T=T[0] Td=CL.data[T] Tunits=CL.dunit[T] alt=CL.data[altp] ta=CL.data[tim] elif len(T)>1: print("[vpinfo] Parameter %s was found multiple times in the basic data." %(param)); return dict() elif len(T)==0: posx=[] for i,ttl in enumerate(CL.extrattl): # for all extra datasets available posx=posx+[[i,j] for j,x in enumerate(ttl) if x.lower() == param.lower()] # check all titles matching with temperature if len(posx)==1: Td=CL.extradata[posx[0][0]][posx[0][1]] # loading the data Tunits=CL.extraunit[posx[0][0]][posx[0][1]] j=[j for j,x in enumerate(CL.extrattl[i]) if x.lower() == 'time'][0] Tt=CL.extradata[posx[0][0]][j] # loading associated time stamp # adapting for too short data for interpolation if len(Tt)<2: Td=np.ones((2,))*NaN; Tt=np.array([CL.times["cloud"][0][0],CL.times["cloud"][0][1]]); # adapting the time vector to a common time vector ta1=np.max([CL.data[tim][0],Tt[0]]); ta2=np.min([CL.data[tim][-1],Tt[-1]]); ta=CL.data[tim][nonzero((CL.data[tim]>=ta1)*(CL.data[tim]<=ta2))[0]] alt=CL.data[altp][nonzero((CL.data[tim]>=ta1)*(CL.data[tim]<=ta2))[0]] fT=interpolate.interp1d(Tt,Td,kind='linear') Td=fT(ta) else: print("[vpinfo] No or multiple %s found in the basic or the extra data." %(param)); return dict() H["bottom"]=list(); H["top"]=list(); H["mean"]=list(); H["median"]=list(); H["stdev"]=list(); H["delta"]=list(); H["slope"]=list(); H["units"]=list(); H["minimum"]=list(); H["maximum"]=list(); try: for i in range(len(CL.times["verticloud"])): if base=='4point': cb=CL.props["height"][i][1]; ct=CL.props["height"][i][2]; else: cb=CL.props["BGheight"][i][0]; ct=CL.props["BGheight"][i][1]; ix=nonzero((alt>=cb)*(alt<=ct)*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))[0] if len(ix)==0: H["mean"].append(nan); H["median"].append(nan); H["stdev"].append(nan); H["minimum"].append(nan); H["maximum"].append(nan); H["top"].append(nan); H["bottom"].append(nan); H["delta"].append(nan); H["slope"].append(nan); H["units"].append(nan) else: H["mean"].append(float(st.nanmean(Td[ix]))) H["median"].append(float(st.nanmedian(Td[ix]))) H["stdev"].append(float(st.nanstd(Td[ix]))) H["minimum"].append(float(np.nanmin(Td[ix]))) H["maximum"].append(float(np.nanmax(Td[ix]))) if base=='4point': if len(nonzero((alt>=ct)*(alt<=CL.props["height"][i][3])*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))[0])==0: H["top"].append(nan) else: H["top"].append(float(st.nanmedian(Td[nonzero((alt>=ct)*(alt<=CL.props["height"][i][3])*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))]))) if len(nonzero((alt>=CL.props["height"][i][0])*(alt<=cb)*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))[0])==0: H["bottom"].append(nan) else: H["bottom"].append(float(st.nanmedian(Td[nonzero((alt>=CL.props["height"][i][0])*(alt<=cb)*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))]))) H["delta"].append(H["bottom"][i]-H["top"][i]) H["slope"].append(H["delta"][i]/(np.mean([ct, CL.props["height"][i][3]])-np.mean([CL.props["height"][i][0], cb]))) else: R=10 # plus/minus R meters around the cloud top if len(nonzero((alt>=ct-R)*(alt<=ct+R)*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))[0])==0: H["top"].append(nan) else: H["top"].append(float(st.nanmedian(Td[nonzero((alt>=ct-R)*(alt<=ct+R)*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))]))) if len(nonzero((alt>=cb-R)*(alt<=cb+R)*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))[0])==0: H["bottom"].append(nan) else: H["bottom"].append(float(st.nanmedian(Td[nonzero((alt>=cb-R)*(alt<=cb+R)*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))]))) H["delta"].append(H["bottom"][i]-H["top"][i]) H["slope"].append(float(H["delta"][i]/(ct-cb))) H["units"].append(Tunits) del ix except: if base=='4point': print("[vpinfo] Height properties must be defined first using the defheight method.") else: print("[vpinfo] Height properties must be defined first using the defBGheight method.") return H
def process_file(fname,out_noS,out_onlyS): input = open(fname,'rt') fnoS = open(out_noS,'wb') fonlyS = open(out_onlyS,'wb') tags = [] # tags in non singletons Stags = [] # Tags in singletons std = [] singletonCount = 0 for line in input: if line.startswith("#"): fnoS.write(line) continue if(len(line.split("\t")) == 9): chrom,junk,junk,start,end,tag,strand,junk,attr = line.split("\t") if float(get_std(attr.rstrip())) == 0: singletonCount+=1 Stags.append(int(tag)) fonlyS.write(line) else: tags.append(int(tag)) std.append(float(get_std(attr.rstrip()))) fnoS.write(line) ratio = signal2noise(tags,Stags) filedict[os.path.basename(fname)] = ".\t.\t"+str(len(tags))+"\t"+str(singletonCount)+"\t"+str(st.nanmedian(tags))+"\t"+str(st.nanmean(tags))+"\t"+str(st.nanmedian(std))+"\t"+str(st.nanmean(std))+"\t"+str(ratio)
fig = plt.figure(figsize=(3.62, 2.76)) ax = fig.add_subplot(111) data = cdata[prnd, :] zenSpacing = 0.5 median = [] zen = np.linspace(0, 90, int(90. / zenSpacing) + 1) for z in zen: criterion = ((data[:, 2] < (z + zenSpacing / 2.)) & (data[:, 2] > (z - zenSpacing / 2.))) ind = np.array(np.where(criterion))[0] tmp = data[ind, :] rout = esm.reject_outliers_arg(tmp[:, 3], 3) for i in rout: ax.plot(90. - z, tmp[i, 3], 'k.', alpha=0.5) median.append(nanmedian(data[ind, 3])) ax.plot(90. - zen, median, 'r-', alpha=0.5) for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + ax.get_xticklabels() + ax.get_yticklabels()): item.set_fontsize(8) ax.set_ylim([-35, 35]) plt.tight_layout() plt.savefig(str(prn) + "_ele.png") plt.close() #================================================ az = np.linspace(0, 360, 721) fig = plt.figure(figsize=(3.62, 2.76)) ax = fig.add_subplot(111, polar=True)
t1 = time.clock() print "time to convert to numpy arrays:", (t1 - t0) ##### # python array test ##### t0 = time.clock() ##### # numpy array test ##### for test in natests: # find mean nameans.append(test.mean()) # find median medianval = nanmedian(test) namedians.append(medianval) # normalise to median norm = test / medianval nanormalised.append(norm) # log2 nalogged.append(numpy.log2(norm)) t1 = time.clock() print "time for numpy test:", (t1 - t0) ##### # validation ##### try: assert (listsEqual(means, nameans)) except AssertionError: