def test_drid_2(): n_frames = 3 n_atoms = 11 n_bonds = 5 top = md.Topology() chain = top.add_chain() residue = top.add_residue('X', chain) for i in range(n_atoms): top.add_atom('X', None, residue) random = np.random.RandomState(0) bonds = random.randint(n_atoms, size=(n_bonds, 2)) for a, b in bonds: top.add_bond(top.atom(a), top.atom(b)) t = md.Trajectory(xyz=random.randn(n_frames, n_atoms, 3), topology=top) got = compute_drid(t).reshape(n_frames, n_atoms, 3) for i in range(n_frames): recip = 1 / squareform(pdist(t.xyz[i])) recip[np.diag_indices(n=recip.shape[0])] = np.nan recip[bonds[:, 0], bonds[:, 1]] = np.nan recip[bonds[:, 1], bonds[:, 0]] = np.nan mean = nanmean(recip, axis=0) second = nanmean((recip - mean)**2, axis=0)**(0.5) third = scipy.special.cbrt(nanmean((recip - mean)**3, axis=0)) np.testing.assert_array_almost_equal(got[i, :, 0], mean, decimal=5) np.testing.assert_array_almost_equal(got[i, :, 1], second, decimal=5) np.testing.assert_array_almost_equal(got[i, :, 2], third, decimal=5)
def get_flow(flow, maskSizePixel, maskLocationPixel): print 'get flow ...' masked, surround = get_masked_data(flow, maskSizePixel, maskLocationPixel) masked_sum = [] whole_sum = [] for m in masked: m = m.reshape(-1, 2) [x, y] = spt.nanmean(m, 0) r = np.sqrt(x ** 2 + y ** 2) theta = math.atan2(y, x) masked_sum.append([theta, r]) for m in flow: m = m.reshape(-1, 2) [x, y] = spt.nanmean(m, 0) r = np.sqrt(x ** 2 + y ** 2) theta = math.atan2(y, x) whole_sum.append([theta, r]) surround_sum = [] for s in surround: tmp_sum = [] for m in s: m = m.reshape(-1, 2) [x, y] = spt.nanmean(m, 0) r = np.sqrt(x ** 2 + y ** 2) theta = math.atan2(y, x) tmp_sum.append([theta, r]) surround_sum.append(tmp_sum) return np.array(whole_sum)[np.newaxis, :, :],\ np.array(masked_sum)[np.newaxis, :, :], np.array(surround_sum)
def calc_kendall_tau(gam_unit, average=False): """ Calculate Kendall tau value for predicted values. This tau scales between -1 (prefect negative correlation) and 1 (perfect correlation). gam_unit : GamUnit has `actual` and `pred` attributes average : bool average across repeats before calculating tau """ assert type(average) == bool if not average: act_flat = gam_unit.actual.flatten() else: act_flat = stats.nanmean(gam_unit.actual, axis=1).flatten() nans = np.isnan(act_flat) act_flat = act_flat[~nans] tau = np.zeros((gam_unit.pred.shape[0])) + np.nan P = np.zeros_like(tau) + np.nan for i, pred in enumerate(gam_unit.pred): if not average: pred_flat = pred.flatten()[~nans] else: pred_flat = stats.nanmean(pred, axis=1).flatten() tau[i], P[i] = stats.kendalltau(act_flat, pred_flat) return tau, P
def format_for_fa(bnd, preaverage=False, use_unbiased=False): ''' Parameters ---------- bnd : BinnedData binned data ''' if preaverage: # average across repeats if not use_unbiased: rate = stats.nanmean(bnd.get_rates(), axis=1) else: rate = stats.nanmean(bnd.unbiased_rate, axis=1) else: if not use_unbiased: rate = bnd.get_rates_flat(with_nans=False) else: rate = bnd.get_unbiased_rate_flat(with_nans=False) ntask, nunit, nbin = rate.shape rate = np.transpose(rate, [1, 0, 2]) # gives nunit, ntask, nbin rate = np.reshape(rate, [nunit, ntask * nbin]) # gives nunit, ntask * nbin = dimensions, observations # mdp and scikits.learn.pca take data in form (observations, dimensions) return rate.T
def calc_norm_summary_tables(accuracy_tbl, time_tbl): """ Calculate normalized performance/ranking summary, as numpy matrices as usual for convenience, and matrices of additional statistics (min, max, percentiles, etc.) Here normalized means relative to the best which gets a 1, all others get the ratio resulting from dividing by the performance of the best. """ # Min across all minimizers, i.e. for each fit problem what is the lowest chi-squared and the lowest time min_sum_err_sq = np.nanmin(accuracy_tbl, 1) min_runtime = np.nanmin(time_tbl, 1) # create normalised tables norm_acc_rankings = accuracy_tbl / min_sum_err_sq[:, None] norm_runtimes = time_tbl / min_runtime[:, None] summary_cells_acc = np.array([np.nanmin(norm_acc_rankings, 0), np.nanmax(norm_acc_rankings, 0), stats.nanmean(norm_acc_rankings, 0), stats.nanmedian(norm_acc_rankings, 0) ]) summary_cells_runtime = np.array([np.nanmin(norm_runtimes, 0), np.nanmax(norm_runtimes, 0), stats.nanmean(norm_runtimes, 0), stats.nanmedian(norm_runtimes, 0) ]) return norm_acc_rankings, norm_runtimes, summary_cells_acc, summary_cells_runtime
def computeBuoyancy(**kwargs): hydro_mass = kwargs['qc'] + kwargs['qi'] + kwargs['qr'] + kwargs['qs'] + kwargs['qh'] theta_rho = kwargs['pt'] * (1 + kwargs['qv'] / 0.622) / (1 + hydro_mass) theta_rho_bar = nanmean(nanmean(theta_rho, axis=-1), axis=-1)[..., np.newaxis, np.newaxis] p_bar = nanmean(nanmean(kwargs['p'], axis=-1), axis=-1)[..., np.newaxis, np.newaxis] return 9.806 * ((theta_rho - theta_rho_bar) / theta_rho_bar + (2. / 7. - 1) * (kwargs['p'] - p_bar) / p_bar)
def despike(self, n=3, recursive=False, verbose=False): """ Replace spikes with np.NaN. Removing spikes that are >= n * std. default n = 3. """ result = self.values.copy() outliers = (np.abs(self.values - nanmean(self.values)) >= n * nanstd(self.values)) removed = np.count_nonzero(outliers) result[outliers] = np.NaN if verbose and not recursive: print("Removing from %s\n # removed: %s" % (self.name, removed)) counter = 0 if recursive: while outliers.any(): result[outliers] = np.NaN outliers = np.abs(result - nanmean(result)) >= n * nanstd(result) counter += 1 removed += np.count_nonzero(outliers) if verbose: print("Removing from %s\nNumber of iterations: %s # removed: %s" % (self.name, counter, removed)) return Series(result, index=self.index, name=self.name)
def timeseries(iData, zoneMap, std=None): ''' Make zone-wise averaging of input data input: 3D matrix(Layers x Width x Height) and map of zones (W x H) output: 2D matrices(L x WH) with mean and std ''' #reshape input cube into 2D matrix r, h, w = iData.shape iData, notNanDataI = cube2flat(iData) #get unique values of not-nan labels uniqZones = np.unique(zoneMap[np.isfinite(zoneMap)]) zoneNum = np.zeros((r, uniqZones.size)) zoneMean = np.zeros((r, uniqZones.size)) zoneStd = np.zeros((r, uniqZones.size)) #in each zone: get all values from input data get not nan data average for i in range(uniqZones.size): zi = uniqZones[i] if not np.isnan(zi): zoneData = iData[:, zoneMap.flat == zi] zoneNum[:, i] = zi zoneMean[:, i] = st.nanmean(zoneData, axis=1) zoneStd[:, i] = st.nanstd(zoneData, axis=1) if std is not None: # filter out of maxSTD values outliers = (np.abs(zoneData.T - zoneMean[:, i]) > zoneStd[:, i] * std).T zoneData[outliers] = np.nan zoneMean[:, i] = st.nanmean(zoneData, axis=1) zoneStd[:, i] = st.nanstd(zoneData, axis=1) return zoneMean, zoneStd, zoneNum
def robust_mean(data, stdcutoff=None, **kwargs): """ Robustified mean. Rejects outliers before taking mean. Ignores NaNs. Significant speedup when utilizing the "stdcutoff" argument. Accepts numpy array, returns value. Parameters ---------- stdcutoff : value, default None stdcutoff is compared to the std. dev. of input data. Explicit outlier rejection only occurs if this test fails. 10x+ speedup. **kwargs passed to reject_outliers deviation_tolerance : value, default .6745 Threshold for outlier rejection normalized such that 1 is equivalent to the 1 standard deviation for gaussian input data. The default value of .6745 will output the interquartile mean. """ if stdcutoff is None: return _sps.nanmean(reject_outliers(data, **kwargs)) else: if _np.std(data) < stdcutoff: return _sps.nanmean(data) else: return _sps.nanmean(reject_outliers(data, **kwargs))
def calc_kendall_tau(gam_unit, average=False): ''' Calculate Kendall tau value for predicted values. This tau scales between -1 (prefect negative correlation) and 1 (perfect correlation). gam_unit : GamUnit has `actual` and `pred` attributes average : bool average across repeats before calculating tau ''' assert(type(average) == bool) if not average: act_flat = gam_unit.actual.flatten() else: act_flat = stats.nanmean(gam_unit.actual, axis=1).flatten() nans = np.isnan(act_flat) act_flat = act_flat[~nans] tau = {} P = {} for k, v in gam_unit.fits.iteritems(): if not average: pred_flat = v.pred.flatten()[~nans] else: pred_flat = stats.nanmean(v.pred, axis=1).flatten() tau[k], P[k] = stats.kendalltau(act_flat, pred_flat) return tau, P
def get3NetworkAvg(data_t, titleName, roiNames, numRuns): #Define the streams #Ventral=[1, 3, 11, 12, 13, 14] #Dorsal=[2, 4, 5, 6, 7, 8, 9, 10] #Lateral=[0, 1, 2, 3, 4] Lateral=[0,1,2,8,9] Dorsal=[8,9,10, 11, 12, 13, 14, 15] Ventral=[1,2, 3, 4, 5, 6] print 'Ventral rois: '+ str(roiNames[Ventral]) print 'Dorsal rois: ' + str(roiNames[Dorsal]) print 'Early Visual rois: '+ str(roiNames[Lateral]) # Get network averages lateralCoher=getNetworkWithin(data_t, Lateral) dorsalCoher=getNetworkWithin(data_t, Ventral) ventralCoher=getNetworkWithin(data_t, Dorsal) #allMeansWithin=(stats.nanmean(lateralCoher.flat), stats.nanmean(dorsalCoher.flat), stats.nanmean(ventralCoher.flat)) #allSTDWithin=(stats.nanstd(lateralCoher.flat), stats.nanstd(dorsalCoher.flat), stats.nanstd(ventralCoher.flat)) allMeansWithin= (stats.nanmean(dorsalCoher.flat), stats.nanmean(ventralCoher.flat)) allSTDWithin=( stats.nanstd(dorsalCoher.flat), stats.nanstd(ventralCoher.flat)) latBtwCoher=getNetworkBtw(data_t, Lateral, Ventral+Dorsal) dorsBtwCoher=getNetworkBtw(data_t, Dorsal, Ventral) ventBtwCoher=getNetworkBtw(data_t, Ventral, Dorsal) #allMeansBtw=(stats.nanmean(latBtwCoher), stats.nanmean(dorsBtwCoher), stats.nanmean(ventBtwCoher)) #allSTDBtw=(stats.nanstd(latBtwCoher), stats.nanstd(dorsBtwCoher), stats.nanstd(ventBtwCoher)) # Just dorsal versus ventral allMeansBtw=( stats.nanmean(dorsBtwCoher), stats.nanmean(ventBtwCoher)) allSTDBtw=( stats.nanstd(dorsBtwCoher), stats.nanstd(ventBtwCoher)) # Make bar graph title= titleName+ 'by Network for ' +sub+ ' for '+ str(numRuns)+' runs'; labels=( 'Dorsal', 'Ventral') makeBarPlots(allMeansWithin, allSTDWithin, allMeansBtw, allSTDBtw, title, labels)
def meanplots(self): figpref.current() pl.close('all') def pcolor(fld, F, cmin, cmax, oneside=True): pl.figure(F) self.pcolor(miv(fld), oneside=oneside) pl.clim(cmin,cmax) """ h = np.where(self.Dchl>0, self.Dchl, np.nan) pcolor(nanmean(h, axis=0), 1, 0, 2) h = np.where(self.Dchl<0, self.Dchl, np.nan) pcolor(-nanmean(h, axis=0), 2, 0, 2) h = np.where(self.Dsst>0, self.Dsst, np.nan) pcolor(nanmean(h, axis=0), 3, 0, 0.4) h = np.where(self.Dsst<0, self.Dsst, np.nan) pcolor(-nanmean(h, axis=0), 4, 0, 0.4) """ pcolor(nanmean(self.Dchl, axis=0), 5, -0.25, 0.25, False) pl.title(r"Mean change of Chl (mg m$^{-3}$)") pl.savefig('figs/liege/meanmap_Dchl.pdf',transparent=True) pcolor(nanmean(self.Dsst, axis=0), 6, -0.25, 0.25, False) pl.title(r"Mean change of SST ($\degree$C)") pl.savefig('figs/liege/meanmap_Dsst.pdf',transparent=True)
def plot_timeseries(ns,nsexp): pl.clf() pl.subplot(2,1,1) pl.scatter(ns.jdvec[:,np.newaxis,np.newaxis] + ns.timecube[:,20:30,20:30]*0,ns.timecube[:,20:30,20:30],2,'r') pl.scatter(ns.jdvec,nanmean(nanmean(ns.timecube[:,20:30,20:30],axis=1),axis=1),5,'y') pl.legend(('Satellite Observations','Daily means')) pl.gca().xaxis.axis_date() pl.xlim(pl.datestr2num('2003-01-01'), pl.datestr2num('2013-05-31')) pl.setp(pl.gca(), yscale="log") pl.ylim(0.01,5) pl.title('BATS') pl.ylabel(r'Chl (mg m$^{-3}$ d$^{-1}$)') pl.subplot(2,1,2) pl.scatter(nsexp.jdvec[:,np.newaxis,np.newaxis] + nsexp.timecube[:,20:30,20:30]*0, nsexp.timecube[:,20:30,20:30],2,'r') pl.scatter(nsexp.jdvec, nanmean(nanmean(nsexp.timecube[:,20:30,20:30],axis=1),axis=1),5,'y') pl.gca().xaxis.axis_date() pl.xlim(pl.datestr2num('2003-01-01'), pl.datestr2num('2013-05-31')) pl.setp(pl.gca(), yscale="log") pl.ylabel(r'Chl (mg m$^{-3}$ d$^{-1}$)') pl.ylim(0.01,5) pl.title(r'Experiment Site (45$\degree$N 24$\degree$W)')
def toleranceLimitProcessing(self,data): # Tolerance limit processing in the EIA, tolerance limits are first # performed for the upper and lower bounds, then afterward for the # interval lengths (as opposed to all at once random.seed(1) resampledData = [random.choice(data) for x in xrange(2000)] (resampLower,resampUpper) = zip(*resampledData) meanLower = nanmean(resampLower) stdLower = nanstd(resampLower) * sqrt(len(data)) # *sqrt is to get population std from sample meanUpper = nanmean(resampUpper) stdUpper = nanstd(resampUpper) * sqrt(len(data)) # ditto K=[32.019, 32.019, 8.380, 5.369, 4.275, 3.712, 3.369, 3.136, 2.967, 2.839, 2.737, 2.655, 2.587, 2.529, 2.48, 2.437, 2.4, 2.366, 2.337, 2.31, 2.31, 2.31, 2.31, 2.31, 2.208] # taken from Liu/Mendel matlab code, in turn from Walpole,Myers,Myers,Ye2008 k = K[min(len(data),24)] acceptableLower = (meanLower-k*stdLower, meanLower+k*stdLower) acceptableUpper = (meanUpper-k*stdUpper, meanUpper+k*stdUpper) for (l,u) in data[:]: try: if not acceptableLower[0] <= l <= acceptableLower[1]: raise ValueError("Intolerable: lower bound %s not in %s" % (str(l), str(acceptableLower)),(l,u)) if not acceptableUpper[0] <= u <= acceptableUpper[1]: raise ValueError("Intolerable: upper bound %s not in %s" % (str(u), str(acceptableUpper)),(l,u)) except ValueError as (e,d): #print e #print "Intolerable: removing data point %s" % str(d) data.remove(d)
def get_phylo_depth_changes(fluc_levels,fluc_type,data_type): assert type(fluc_levels)==list assert type(fluc_type)==str assert fluc_type in ["sync","stag","lowhigh"] assert type(data_type)==str assert data_type in ["raw","avg"] for fluc_level in fluc_levels: fluc_length=int(fluc_level) if data_type=="avg": start_slope_means=[] start_slope_se=[] end_slope_means=[] end_slope_se=[] else: start_slopes=[[] for i in range(30)] end_slopes=[[] for i in range(30)] for replicate in range(1,31): avg_depth_for_updates=[] start_inflow_slopes=[] end_inflow_slopes=[] averages_for_replicate=get_file_lines("../data_"+str(fluc_type)+"_"+str(fluc_level)+"/replicate_"+str(replicate)+"/average.dat") for line in averages_for_replicate: if len(line)!=0 and line[0]!="#": temp=line.split(" ") update=int(temp[0]) if update%fluc_length==0: depth=float(temp[11]) avg_depth_for_updates+=[float(depth)] for i in range(len(avg_depth_for_updates)-1): if i%2==0: start_inflow_slopes+=[math.fabs(avg_depth_for_updates[i]-avg_depth_for_updates[i+1])] else: end_inflow_slopes+=[math.fabs(avg_depth_for_updates[i]-avg_depth_for_updates[i+1])] if data_type=="avg": start_slope_means+=[stats.nanmean(start_inflow_slopes)] start_slope_se+=[stats.sem(start_inflow_slopes)] end_slope_means+=[stats.nanmean(end_inflow_slopes)] end_slope_se+=[stats.sem(end_inflow_slopes)] else: start_slopes[replicate-1]=list(start_inflow_slopes) end_slopes[replicate-1]=list(end_inflow_slopes) if data_type=="avg": pickle.dump(start_slope_means,open("../plot_data/start_slope_mean_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb")) pickle.dump(end_slope_means,open("../plot_data/end_slope_mean_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb")) pickle.dump(start_slope_se,open("../plot_data/start_slope_se_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb")) pickle.dump(end_slope_se,open("../plot_data/end_slope_se_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb")) else: pickle.dump(start_slopes,open("../plot_data/start_slope_raw_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb")) pickle.dump(end_slopes,open("../plot_data/end_slope_raw_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb")) return "success"
def calc_clipped_stats_old(data, clip=3.0, nIter=10): """Calculate the mean and stdev of an array given a sigma clip""" data = np.array(data).flatten() mean = float(stats.nanmean(data)) std = float(stats.nanstd(data)) mad = float(MAD(data)) if clip > 0.0: convergeFlg = 0 itCnt = 0 while convergeFlg==0 and itCnt<nIter: meanOld, stdOld, madOld = mean, std, mad minVal = mean - (clip * mad) maxVal = mean + (clip * mad) # Blank values outside the 3-sigma range dataMsk = np.where(np.greater(data, maxVal), np.nan, data) dataMsk = np.where(np.less(data, minVal), np.nan, dataMsk) # Measure the statistics mean = stats.nanmean(dataMsk) median = stats.nanmedian(dataMsk) std = stats.nanstd(dataMsk) mad = MAD(dataMsk) npix = np.sum(np.where(np.isnan(dataMsk),0.0,1.0)) dataMsk = [] if mean == meanOld and mad == madOld: convergFlg = 1 itCnt += 1 # Assemble the measurements into a dictionary m = {} m['mean'] = float(mean) m['median'] = float(median) m['stdev'] = float(std) m['madfm'] = float(mad) m['npix'] =int(npix) m['max'] = float(np.nanmax(data)) m['min'] = float(np.nanmin(data)) del data # If all nans if m['npix'] == 0: m['stdev'] = 0.0 m['mean'] = 0.0 m['median'] = 0.0 m['max'] = 0.0 m['min'] = 0.0 m['centmax'] = (0.0,0.0) m['madfm'] = 0.0 m['success'] = False else: m['success'] = True return m
def get_temp(summary_table): print summary_table # print "datestart,temp,ra,dec" # print "%s,%.2f,%.3f,%.3f" % (summary_table[0]['datestart'], # np.mean([np.mean(summary_table['pre_temp']), # np.mean(summary_table['post_temp'])]), # np.mean(summary_table['ra']), # np.mean(summary_table['dec'])) return nanmean([nanmean(summary_table["pre_temp"]), nanmean(summary_table["post_temp"])])
def reasonableIntervalProcessing(self,data): databackup = data[:] #keep backup in case all intervals are deleted random.seed(1) resampledData = [random.choice(data) for x in xrange(2000)] (resampLower,resampUpper) = zip(*resampledData) resampInterval = map(lambda x: x[1]-x[0], resampledData) meanLower = nanmean(resampLower) stdLower = nanstd(resampLower) * sqrt(len(data)) # it appears *sqrt is done to estimate population std from sample meanUpper = nanmean(resampUpper) stdUpper = nanstd(resampUpper) * sqrt(len(data)) # ditto meanInterval = nanmean(resampInterval) stdInterval = nanstd(resampInterval) * sqrt(len(data)) # ditto if stdLower+stdUpper==0: barrier = (meanLower+meanUpper)/2 print "barrierAvg", barrier elif stdLower == 0: barrier = meanLower+.5 print "barrierlower", barrier elif stdUpper == 0: barrier = meanUpper-.5 print "barrierupper", barrier else: barrier1 = ( (meanUpper*stdLower**2-meanLower*stdUpper**2) + stdLower*stdUpper*sqrt((meanLower-meanUpper)**2 + 2*(stdLower**2-stdUpper**2)*log(stdLower/stdUpper)) ) /(stdLower**2-stdUpper**2) barrier2 = ( (meanUpper*stdLower**2-meanLower*stdUpper**2) - stdLower*stdUpper*sqrt((meanLower-meanUpper)**2 + 2*(stdLower**2-stdUpper**2)*log(stdLower/stdUpper)) )/(stdLower**2-stdUpper**2) print "barrier1", barrier1 print "barrier2", barrier2 if barrier1 >= meanLower and barrier1 <= meanUpper: barrier = barrier1 print "barrier1", barrier #elif barrier2 >= meanLower and barrier1 <= meanUpper: else: barrier = barrier2 print "barrier2", barrier for (l,u) in data[:]: try: #if l > barrier+(.1*stdLower) or u < barrier-(.1*stdUpper): #if l > barrier+stdLower or u < barrier-stdUpper: #if l > barrier and u < barrier: #if l > barrier+(.001*stdLower) or u < barrier-(.001*stdUpper): if not (2*meanLower - barrier) <= l <= barrier <= u <= (2*meanUpper- barrier): raise ValueError("Unreasonable: interval %s does not cross reasonable barrier %s" % (str((l,u)), str(barrier)),(l,u)) except ValueError as (e,d): #print e #print "Unreasonable: removing data point %s" % str(d) data.remove(d)
def selectAgentCL(dist,dev,hz): #a=(dist.max(0)<3).nonzero()[0] #b=norm(np.median(dev,axis=0),axis=1)<MAXPHI #b=np.mod(dev/np.pi*180.0+180,360)-180 #print (np.abs(dev)<MAXPHI).mean(axis=0) b=(np.abs(dev)<MAXPHI).mean(axis=0)>0.5 a=np.logical_and(nanmean(dist,0)<CLFOCUSRADIUS,b) a= np.logical_or(nanmean(dist[:int(hz*CLSACTARGETDUR)],0) < CLSACTARGETRADIUS,a).nonzero()[0] return a
def calc_perf_stats(self): """Calculates mean performance based on trimmed time series.""" self.mean_tsr, self.std_tsr = nanmean(self.tsr), nanstd(self.tsr) self.mean_cp, self.std_cp = nanmean(self.cp), nanstd(self.cp) self.mean_cd, self.std_cd = nanmean(self.cd), nanstd(self.cd) self.mean_ct, self.std_ct = nanmean(self.ct), nanstd(self.ct) self.mean_u_enc = nanmean(self.tow_speed) self.mean_tow_speed = self.mean_u_enc self.std_u_enc = nanstd(self.tow_speed) self.std_tow_speed = self.std_u_enc
def Average_StN(Dir_head, Bias_File, nRun, Mass_Col, Mass_Err_Col): #Ex12_Bias[Input_Mass_Col]/(0.5*(Ex12_Bias[Mass_Error_Cols[0]]+Ex12_Bias[Mass_Error_Cols[1]])) StN = np.zeros(nRun); ErrorWidth = np.zeros(nRun) for i in range(1, nRun+1): Input = np.genfromtxt(Dir_head+str(i)+'/'+Bias_File) ErrorWidth[i-1] = (0.5*(Input[Mass_Error_Cols[0]]+Input[Mass_Error_Cols[1]])) StN[i-1] = Input[Mass_Col]/ErrorWidth[i-1] print 'StN:', stats.nanmean(StN) print 'Average Error width:', stats.nanmean(ErrorWidth) print ' ' return stats.nanmean(StN)
def extract_pigments(f): ''' extract pigments data of *.txt files, and return a list of dictionaries. INPUT ----- f : str or pandas object string of specific .csv file OUTPUT ------ var: list of dictionaries, containing pigments and informations. ''' stringy = str(f) term = stringy[stringy.rfind('.'):stringy.rfind('.')+4] if term == '.csv': dat = np.genfromtxt(stringy, names=True, dtype=None, delimiter=',') print(dat) lista = dat.dtype.names else: dat = f lista = dat.keys() dicts = [] nd = {} parse = ['station','treatment','time'] for k in lista: if k in parse: continue nd[k] = [] nd['name'] = k nd['local'] = dat['station'][0] nd['ct0'] = dat[k][(dat['treatment']=='Initial')] nd['ct1'] = dat[k][(dat['treatment']=='Control') & (dat['time']=='T1')] nd['ct2'] = dat[k][(dat['treatment']=='Control') & (dat['time']=='T2')] nd['ft0'] = dat[k][(dat['treatment']=='Initial')] nd['ft1'] = dat[k][(dat['treatment']=='Fe') & (dat['time']=='T1')] nd['ft2'] = dat[k][(dat['treatment']=='Fe') & (dat['time']=='T2')] nd['dt0'] = dat[k][(dat['treatment']=='Initial')] nd['dt1'] = dat[k][(dat['treatment']=='DFA') & (dat['time']=='T1')] nd['dt2'] = dat[k][(dat['treatment']=='DFA') & (dat['time']=='T2')] nd['xcontrol'] = np.append(nanmean(nd['ct0']),(nanmean(nd['ct1']), nanmean(nd['ct2']))) nd['xferro'] = np.append(nanmean(nd['ft0']),(nanmean(nd['ft1']), nanmean(nd['ft2']))) nd['xdfa'] = np.append(nanmean(nd['dt0']),(nanmean(nd['dt1']), nanmean(nd['dt2']))) nd['econtrol'] = np.append(nanstd(nd['ct0']),(nanstd(nd['ct1']), nanstd(nd['ct2']))) nd['eferro'] = np.append(nanstd(nd['ft0']),(nanstd(nd['ft1']), nanstd(nd['ft2']))) nd['edfa'] = np.append(nanstd(nd['dt0']),(nanstd(nd['dt1']), nanstd(nd['dt2']))) if nd: dicts.append(nd) nd = {} return dicts
def isc_corrmat_within_diff(indxA, indxB, C): """Faster within-group subject-total correlation contrast using correlation matrix Arguments: indxA (list): list of indices corresponding to group A members. indxB (list): likewise for group B (should be no overlap) Returns: ndarray with isc for A minus isc for B. """ C_A = C[..., np.vstack(indxA), np.hstack(indxA)] # last rows and columns using indxA C_B = C[..., np.vstack(indxB), np.hstack(indxB)] return nanmean(intersubcorr(C_A), axis=-1) - nanmean(intersubcorr(C_B), axis=-1)
def time(self, i=500,j=250): if not hasattr(self,'h5f'): self.h5open() mat = self.h5f.root.chl[:,j-5:j+5,i-5:i+5] figpref.presentation() jd = pl.datestr2num('2012-01-01') jd2 = pl.datestr2num('2013-04-30')+1 pl.gca().xaxis.axis_date() pl.scatter(self.jdvec[:,np.newaxis,np.newaxis]+mat*0,mat, 5,'g') pl.xlim(jd,jd2) pl.scatter(self.jdvec,nanmean(nanmean(mat,axis=1),axis=1),20,'y') #setp(gca(),yscale='log') pl.ylim(0.01,5) return pl.gca()
def fillna_with_mean(mat): #Obtain mean of columns as you need, nanmean is just convenient. col_mean = stats.nanmean(mat,axis=0) #Find indicies that you need to replace inds = np.where(np.isnan(mat)) #Place column means in the indices. Align the arrays using take mat[inds]=np.take(col_mean,inds[1])
def load_unicef_data(): """Loads Unicef data from CSV file. Retrieves a matrix of all rows and columns from Unicef child mortality dataset. Args: none Returns: Country names, feature names, and matrix of values as a tuple (countries, features, values). countries: vector of N country names features: vector of F feature names values: matrix N-by-F """ fname = 'SOWC_combined_simple.csv' # Uses pandas to help with string-NaN-numeric data. data = pd.read_csv(fname, na_values='_') # Strip countries title from feature names. features = data.axes[1][1:] # Separate country names from feature values. countries = data.values[:,0] values = data.values[:,1:] # Convert to numpy matrix for real. values = np.asmatrix(values,dtype='float64') # Modify NaN values (missing values). mean_vals = stats.nanmean(values, axis=0) inds = np.where(np.isnan(values)) values[inds] = np.take(mean_vals, inds[1]) return (countries, features, values)
def load_test_fs(): test_fs = np.genfromtxt(open(dir + '/train_v2.csv','rb'), delimiter=',', skip_header = 1) col_mean = stats.nanmean(test_fs, axis=0) inds = np.where(np.isnan(test_fs)) test_fs[inds] = np.take(col_mean, inds[1]) test_fs[np.isinf(test_fs)] = 0 return test_fs
def mean_f(self, x): """Compute mean over time varying axis of a front relative quantity, x. """ # TODO: the axis used in nanmean is different for U and Uf # calcs - change Uf dims to make consistent? return stats.nanmean(x, axis=1)
def timeseries(iData, zoneMap): ''' Make zone-wise averaging of input data input: 3D matrix(Layers x Width x Height) and map of zones (W x H) output: 2D matrices(L x WH) with mean and std ''' #reshape input cube into 2D matrix r, h, w = iData.shape iData, notNanDataI = cube2flat(iData) #get unique values of labels uniqZones = np.unique(zoneMap) # leave only not-nan uniqZones = uniqZones[~np.isnan(uniqZones)] zoneNum = np.zeros((r, uniqZones.size)) zoneMean = np.zeros((r, uniqZones.size)) zoneStd = np.zeros((r, uniqZones.size)) #in each zone: get all values from input data get not nan data average for i in range(uniqZones.size): zi = uniqZones[i] if not np.isnan(zi): zoneData = iData[:, zoneMap.flat == zi] zoneNum[:, i] = zi zoneMean[:, i] = st.nanmean(zoneData, axis=1) zoneStd[:, i] = st.nanstd(zoneData, axis=1) return zoneMean, zoneStd, zoneNum
def getTraceAvg(dm, avgFunc=nanmean, **traceParams): """ Gets a single average trace Arguments: dm -- a DataMatrix Keyword arguments: avgFunc -- the function to use to determine the average trace. This function must be robust to nan values. (default=nanmean) *traceParams -- see getTrace() Returns: An (xData, yData, errData) tuple, where errData contains the standard error. """ traceLen = traceParams['traceLen'] mTrace = np.empty( (len(dm), traceLen) ) mTrace[:] = np.nan i = 0 for trialDm in dm: aTrace = getTrace(trialDm, **traceParams) mTrace[i, 0:len(aTrace)] = aTrace i += 1 xData = np.linspace(0, traceLen, traceLen) yData = nanmean(mTrace, axis=0) errData = nanstd(mTrace, axis=0) / np.sqrt(mTrace.shape[0]) errData = np.array( [errData, errData] ) return xData, yData, errData
def get_binned_stats(bins, binsize, xdat, yres): from numpy import array, nanstd, where from scipy.stats import nanmean medres = [] stdres = [] yres = array(yres) halfbin = binsize / 2.0 for bin in bins: index = array( where((xdat >= bin - halfbin) & (xdat < bin + halfbin))[0]) medres.append(nanmean(yres[index])) stdres.append(nanstd(yres[index])) return array(medres), array(stdres)
def fnBuildStarTemplate(CCFs, Templates=None, MeanRV=None): if Templates == None: Templates = CCFs.keys() TemplateRVMean = stats.nanmean( [CCFs[CCFName].RVC for CCFName in CCFs.keys()]) Template = np.nansum([ fnShiftCCF(CCFs[CCFName].data.copy(), CCFs[CCFName].wave.copy(), CCFs[CCFName].RVC - TemplateRVMean) for CCFName in Templates ], axis=0) TemplateWave = CCFs[Templates[0]].wave - CCFs[Templates[0]].RVC Template = np.divide(Template.copy(), np.nanmax(Template.copy())) return Template, TemplateWave, TemplateRVMean
def addPoint(self, timestamp, imbgr, imdepth): self.timeline = np.append(self.timeline, timestamp) v = self.rawFeatureVector(imbgr, imdepth) if self.memory.shape != (0, ): self.memory = np.vstack((self.memory, v)) else: self.memory = v[np.newaxis] if self.memory.shape[0] > smoothing_memory: self.memory = self.memory[-smoothing_memory:] #indexing here? (smooth only some features) smoothed = stats.nanmean(self.memory) if self.features.shape != (0, ): self.features = np.vstack((self.features, smoothed)) else: self.features = smoothed[np.newaxis] return smoothed
def aveH5(years): """ Calculates average 500mb height for a 100 year period Parameters ---------- years : 100 year period from CESM control Returns ---------- tq : average 500mb height (time x lat x lon) lat : array of latitudes lon : array of longitudes """ directory = '/volumes/data/gcm/cesm-lens/B1850C5CN/Aday/Z500/CESM1-CAM5/005/' filename = 'b.e11.B1850C5CN.f09_g16.005.cam.h1.Z500.%s.nc' % years data = directory + filename value = Dataset(data) lat = value.variables['lat'][112:167] lon = value.variables['lon'][58:240] z500 = value.variables['Z500'][:, 112:167, 58:240] value.close() ### Calculate climatologies - very slow doy = list(xrange(30)) aveheights = [] for j in xrange(59, 36500, 365): start = j end = j + 30 q = np.array(xrange(start, end)) Z500 = z500[q, :, :] aveheights.append(Z500) century_averages = np.asarray(aveheights) tq = [] for m in xrange(len(doy)): a = century_averages[:, m, :, :] b = nanmean(a) tq.append(b) tq = np.asarray(tq) return tq, lat, lon
def get_data(): ''' Output: returns a list of 3 data-sets scaled to have zero mean and unit variance ''' # read excel files as data-frames: consumerDiscrete = pd.read_excel('./data/U of M Student Data - Consumer Discretionary .xlsx',\ 'Screening', skiprows=7, na_values=['-', 'NM'] ) consumerStaples = pd.read_excel('./data/U of M Student Data - Consumer Staples.xlsx',\ 'Screening', skiprows=7, na_values=['-', 'NM'] ) industrials = pd.read_excel('./data/U of M Student Data - Industrials.xlsx',\ 'Screening', skiprows=7, na_values=['-', 'NM'] ) # remove string data columns consumerDiscrete = consumerDiscrete.iloc[:, 5::] consumerStaples = consumerStaples.iloc[:, 5::] industrials = industrials.iloc[:, 5::] # list of all the data sets: dataSet = [consumerDiscrete, consumerStaples, industrials] for i in range( len(dataSet) ): # calculate column means: colMean = stats.nanmean(dataSet[i], axis=0) # find indices where you need to replace: inds = np.where( np.isnan( dataSet[i] ) ) # convert data-frame to numpy array: data = pd.DataFrame.as_matrix( dataSet[i] ) # replace NA values with col means: data[inds] = np.take(colMean, inds[1]) # clear variable value: dataSet[i] = None # replace variable with new value: dataSet[i] = scale(data) return dataSet
def av_mtrx(mtrx): ''' take the average of a pairwise matrix ''' nm = len(mtrx) avMtrx = np.zeros((nm,nm)) for i1 in range(nm): for i2 in range(nm): if i1 == i2: avMtrx[i1,i2] = np.nan continue val1 = mtrx[i1,i2] val2 = mtrx[i2,i1] if (val1 == -666) or (val2 == -666): avMtrx[i1,i2] = np.nan else: avMtrx[i1,i2] = stats.nanmean([val1,val2]) return avMtrx
def defaultValueCorrection(self, data): #address default values: middle = nanmean([(d[1] + d[0]) / 2 for d in data]) / (self.r[1] - self.r[0]) print "middle", middle if middle < .35: print "filtering u=100" for d in data: if d[1] == 100 and random.random() > .3: data.remove(d) if middle > .65: print "filtering l=0" for d in data: if d[0] == 0 and random.random() > .3: data.remove(d) for d in data: if (d[0] == 0 or d[1] == 100) and random.random() > .7: data.remove(d) print len(data), "after correcting for default values"
def calc_halftimes(self): if not hasattr(self, 'restime_mat'): print "loading restime_mat from file" self.restime_mat = np.load('restime_mat.npz')['restime_mat'] self.jdvec = np.unique(self.jd) mat = self.restime_mat - self.restime_mat[:,:,-1][:,:,np.newaxis] mat = mat / mat[:,:,0][:,:,np.newaxis] meanmat = nanmean(mat,axis=0) xi = self.jdvec-self.jdvec[0] nreg = meanmat.shape[0] self.regtaus = np.zeros((nreg,)) for i in np.arange(nreg): yi = meanmat[i,:] try: ipos = np.nonzero(yi>0.1)[0].max() except: continue k,m,_,_,_ = linregress(xi[:ipos], np.log(yi[:ipos])) tau = np.interp(-1, (xi*k+m)[::-1], xi[::-1]) self.regtaus[i] = tau
def fit_naa(self, reject_outliers=3.0, fit_lb=1.8, fit_ub=2.4, phase_correct=True): """ Fit a Lorentzian function to the NAA peak at ~ 2 ppm. Example of fitting inverted peak: Foerster et al. 2013, An imbalance between excitatory and inhibitory neurotransmitters in amyothrophic lateral sclerosis revealed by use of 3T proton MRS """ model, signal, params = ana.fit_lorentzian(self.diff_spectra, self.f_ppm, lb=fit_lb, ub=fit_ub) # Store the params: self.naa_model = model self.naa_signal = signal self.naa_params = params self.naa_idx = ut.make_idx(self.f_ppm, fit_lb, fit_ub) mean_params = stats.nanmean(params, 0) self.naa_auc = self._calc_auc(ut.lorentzian, params, self.naa_idx)
def subtract_mean(sig, hasNans=False): ''' Subtracts the mean from a signal with nanmean. Parameters ---------- sig : ndarray, shape(n,) hasNans : boolean, optional If your data has nans use this flag if you want to ignore them. Returns ------- ndarray, shape(n,) sig minus the mean of sig ''' if hasNans: return sig - nanmean(sig) else: return sig - np.mean(sig)
def __init__(self, y=None, M=None): """Computes the mean and scoring matrix of measurements INPUT: y -- DxN -- N measurements of a time series in D dimensions M -- DxD (optional) -- measurement error covariance for y -- If M is missing, it is assumed to be diagonal with variances -- given by 1/2 variance of the second order differences of y """ # if M given --> use fromCovAndMean # elif we got y --> use fromData # else --> create empty object with None in members if M is not None: self.fromCovAndMean(nanmean(y, 1), M) elif y is not None: self.fromData(y) else: self.y0 = None self.M = None self.S = None
def MeanWithConfidenceInterval(Y, confidence=0.95): """ Use the fact that (mean(Y) - mu) / (std(Y)/sqrt(n)) is a Student T distribution with n-1 degrees of freedom Returns: 2 tuple (mean, symmetric confidence interval size). """ n = len(Y) Y_bar = st.nanmean(Y) # According to the Student T-test distribution for n-1 degrees of freedom # find the position where the CDF is 0.975 (assuming we want a confidence # of 0.95). The lower part of the tail will account for the other 0.025 # chance. t = st.t.ppf((confidence + 1.0) / 2.0, n - 1) SD = st.nanstd(Y, bias=False) # use the unbiased estimator: sqrt(y^2 / (n-1)) SE = SD / np.sqrt(len(Y)) return Y_bar, t * SE
def test_sincos(): """ Simple test/demo of Phaser, recovering a sine and cosine Demo courtesy of Jimmy Sastra, U. Penn 2011 """ from numpy import sin, cos, pi, array, linspace, cumsum, asarray, dot, ones from pylab import plot, legend, axis, show, randint, randn, std, lstsq # create separate trials and store times and data dats = [] t0 = [] period = 55 # i phaseNoise = 0.05 / sqrt(period) snr = 20 N = 10 print N, "trials with:" print "\tperiod %.2g" % period, "(samples)\n\tSNR %.2g" % snr, "\n\tphase noise %.2g" % phaseNoise, "(radian/cycle)" print "\tlength = [", for li in xrange(N): l = randint(400, 2000) # length of trial dt = pi * 2.0 / period + randn( l) * phaseNoise # create noisy time steps t = cumsum(dt) + rand() * 2 * pi # starting phase is random raw = asarray([sin(t), cos(t)]) # signal raw = raw + randn(*raw.shape) / snr # SNR=20 noise t0.append(t) dats.append(raw - nanmean(raw, axis=1)[:, newaxis]) print l, print "]" phr = Phaser(dats, psecfunc=lambda x: dot([1, -1], x)) phi = [phr.phaserEval(d) for d in dats] # extract phaseNoise reg = array([linspace(0, 1, t0[0].size), ones(t0[0].size)]).T tt = dot(reg, lstsq(reg, t0[0])[0]) plot(((tt - pi / 4) % (2 * pi)) / pi - 1, dats[0].T, '.') plot((phi[0].T % (2 * pi)) / pi - 1, dats[0].T, 'x') #plot data versus phase legend(['sin(t)', 'cos(t)', 'sin(phi)', 'cos(phi)']) axis([-1, 1, -1.2, 1.2]) show()
def comp_cc(self,res=1,bin_size=1,time_range=[],nmax=100,pop_id = 'all', pl_flag = 0): ''' compute pairwise correlation coefficient and the mean of all pairs''' self.load_spikes() spikes = self.events['spikes'] if len(spikes)==0: print 'Comp cc: spike array is empty!' return np.nan,np.nan,np.nan print 'Comp CC' pop_id,spikes,neuron_nr = self.get_pop_spikes(spikes,nmax,pop_id) # get only spikes within time_range if time_range!=[]: idx = (spikes[:,1]>=time_range[0]) & (spikes[:,1]<=time_range[1]) spikes = spikes[idx,:] else: time_range = [0,self.pars['T_total']] if len(spikes)==0: print 'Comp cc: spike array is empty!' return np.nan,np.nan,np.nan print 'Comp CC' ids = np.unique(spikes[:,0]) # get ids of neurons that fired neuron_nr = 1 kernel_w = 1 tmax = self.pars['T_sim']+self.pars['T_wup'] kernel_len = len(np.arange(-kernel_w*10,kernel_w*10+1*res,res))-1 kde = np.zeros((nmax,tmax+kernel_len)) for ii,nid in enumerate(ids[:nmax]): idx = spikes[:,0] == nid tmp,kernel,xx = misc2.kde(spikes[idx,1],kernel_w,res,neuron_nr,1,'normal',tmax) kde[ii,:] = tmp cc1 = np.corrcoef(kde) cc2 = np.triu(cc1,1).flatten() # keep only upper triangular values if pl_flag: pl.hist(cc2,100,label = 'chhub') pl.show() return cc2,st.nanmean(cc2),max(cc2)
def binMean(X, Y, numBins=8, xmin=None, xmax=None): if xmin is None: xmin = X.min() if xmax is None: xmax = X.max() bins = np.linspace(xmin, xmax, numBins + 1) # print bins,Y YY = np.array([ nanmean(Y[(X > bins[binInd]) & (X <= bins[binInd + 1])]) for binInd in range(numBins) ]) YYmedian = np.array([ nanmedian(Y[(X > bins[binInd]) & (X <= bins[binInd + 1])]) for binInd in range(numBins) ]) YYstd = np.array([ np.std(Y[(X > bins[binInd]) & (X <= bins[binInd + 1])]) for binInd in range(numBins) ]) return bins[:-1] + (bins[1] - bins[0]) * 0.5, YY, YYmedian, YYstd
def updateLabelsAndFit(self, bufferA, bufferB): self.plotAttributes["curve"].setData(bufferA, bufferB) try: if self.ui.checkBoxAutoscale.isChecked(): self.setPlotRanges(bufferA, bufferB) minBufferA = nanmin(bufferA) minBufferB = nanmin(bufferB) maxBufferA = nanmax(bufferA) maxBufferB = nanmax(bufferB) if self.ui.checkBoxShowAve.isChecked(): rtbsaUtils.setPosAndText(self.text["avg"], nanmean(bufferB), minBufferA, minBufferB, 'AVG: ') if self.ui.checkBoxShowStdDev.isChecked(): xPos = (minBufferA + (minBufferA + maxBufferA) / 2) / 2 rtbsaUtils.setPosAndText(self.text["std"], nanstd(bufferB), xPos, minBufferB, 'STD: ') if self.ui.checkBoxCorrCoeff.isChecked(): correlation = corrcoef(bufferA, bufferB) rtbsaUtils.setPosAndText(self.text["corr"], correlation.item(1), minBufferA, maxBufferB, "Corr. Coefficient: ") if self.ui.checkBoxLinFit.isChecked(): self.text["slope"].setPos((minBufferA + maxBufferA) / 2, minBufferB) self.getLinearFit(bufferA, bufferB, True) elif self.ui.checkBoxPolyFit.isChecked(): self.text["slope"].setPos((minBufferA + maxBufferA) / 2, minBufferB) self.getPolynomialFit(bufferA, bufferB, True) except ValueError: print "Error updating plot range"
def isc_within(E, condA, method=('inter-subject', 'subject-total'), threshold=True): condnameA = condA.name.split('/')[-1] g_name = 'correlations/%s'%condnameA g_out = condA[g_name] if g_name in condA else condA.create_group(g_name) iter_runs = E.iter_runs(condnameA) dlist = [run.load(standardized=True, threshold=threshold) for run in iter_runs] if 'inter-subject' in method: #wg isc C = crosscor(dlist, standardized=True) C_tmp = C - np.diag([np.nan]*C.shape[-1]) #not done in place, just in case C_mean = np.squeeze(np.apply_over_axes(nanmean, C_tmp, [-1,-2])) dset_overwrite(g_out, 'isc_mat', C) dset_overwrite(g_out, 'inter-subject', C_mean) if 'subject-total' in method: #WG subject-total sub_ttl_corr = [corcomposite(dat, condA['composite']) for dat in dlist] sub_ttl_mean = nanmean(sub_ttl_corr, axis=0) dset_overwrite(g_out, 'subject-total', sub_ttl_mean)
def plot_dist_to_targ(task_entry, reach_trajectories=None, targ_dist=10., plot_all=False, ax=None, target=None, update_rate=60., decoder_rate=10., **kwargs): task_entry = dbfn.lookup_task_entries(task_entry) if reach_trajectories == None: reach_trajectories = task_entry.get_reach_trajectories() if target == None: target = np.array([targ_dist, 0]) trajectories_dist_to_targ = [map(np.linalg.norm, traj.T - target) for traj in reach_trajectories] step = update_rate/decoder_rate trajectories_dist_to_targ = map(lambda x: x[::step], trajectories_dist_to_targ) max_len = np.max([len(traj) for traj in trajectories_dist_to_targ]) n_trials = len(trajectories_dist_to_targ) # TODO use masked arrays data = np.ones([n_trials, max_len]) * np.nan for k, traj in enumerate(trajectories_dist_to_targ): data[k, :len(traj)] = traj from scipy.stats import nanmean, nanstd mean_dist_to_targ = np.array([nanmean(data[:,k]) for k in range(max_len)]) std_dist_to_targ = np.array([nanstd(data[:,k]) for k in range(max_len)]) if ax == None: plt.figure() ax = plt.subplot(111) # time vector, assuming original screen update rate of 60 Hz time = np.arange(max_len)*0.1 if plot_all: for dist_to_targ in trajectories_dist_to_targ: ax.plot(dist_to_targ, **kwargs) else: ax.plot(time, mean_dist_to_targ, **kwargs) import plotutil #plotutil.set_ylim(ax, [0, targ_dist]) plotutil.ylabel(ax, 'Distance to target') plotutil.xlabel(ax, 'Time (s)') plt.draw()
def runModel(theta): global tf, inits step = 1 # setting parameters beta, alpha, sigma = theta[:3] vnames = ['S', 'E', 'I', 'A', 'R'] # rates: b,ki,ka,ri,ra # r = (0.001, 0.1, 0.1, 0.01, 0.01) r = (beta, alpha * epsilon, (1 - alpha) * epsilon, tau, tau) # print r,inits # propensity functions propf = (f1, f2, f3, f4, f5) tmat = np.array([[-1, 0, 0, 0, 0], [1, -1, 0, -1, 0], [0, 1, -1, 0, 0], [0, 0, 0, 1, -1], [0, 0, 1, 0, 1]]) M = Model(vnames=vnames, rates=r, inits=inits, tmat=tmat, propensity=propf) # t0 = time.time() M.run(tmax=tf, reps=1, viz=0, serial=True) t, series, steps, events = M.getStats() ser = st.nanmean(series, axis=0) # print series.shape, ser.shape return ser
def average_data(iData, iDate, iYears, iMonths): '''Average input data over given period input: 3D datacube (RxWxH, R-number of layers, W-width, H-height), vector of dates (list of datetime objects) list of Years list of months output: one 2D matrix with averaged values in each pixel datetime of the first data in the average usage: to find multi-annual monthly mean: averagedData = average_data(iData, iDate, [1998:2011], [7]): to find seasonal mean in one year averagedData = average_data(iData, iDate, [1998], [5:9]): ''' #list of tuples (year, month) for all input dates yearmonth = np.array([[y.year,y.month] for y in iDate]) r, h, w = iData.shape #create and fill temporary 3D matrix with data for averaging iData4aver = None for iy in iYears: for im in iMonths: #find appropriate layers in input data iDataSubset = iData[(yearmonth[:, 0] == iy) * (yearmonth[:, 1] == im), :, :] #append to the temporary 3D matrix if iData4aver is None: iData4aver = iDataSubset else: iData4aver = np.append(iData4aver, iDataSubset, axis=0) #average oDate = dt.date(iYears[0], iMonths[0], 1) if iMonths[0] > 10: oDate = dt.date(iYears[0]-1, iMonths[0], 1) return st.nanmean(iData4aver, axis=0).reshape(1,h,w), oDate
def initializePandPi(K, B, data, epsilonForInitialization=0.1): # data must be a list if type(data) is not list: data = data.tolist() #sum of pi must be = 1 Pi = list(np.random.dirichlet(np.ones(K), size=1)[0]) # the process for initializing P P = [] # the strategy for initializing correctly P: (1-2epsilonForInitialization) * H_qlcq + epsilonForInitialization * H_moyen + epsilonForInitialization * unif H_moyen = nanmean(data, axis=0) #print("Data: ", data) #print("Average histogram: ", H_moyen) for k in range(K): H_l = random.choice(data) T1 = [(1 - 2 * epsilonForInitialization) * u for u in H_l] #print ("Term 1 for P cluster " , k, ": ", T1) T2 = [epsilonForInitialization * u for u in H_moyen] #print ("Term 2 for P cluster " , k, ": ", T2) T3 = [epsilonForInitialization / B] * len(data[0]) #print ("Term 3 for P cluster " , k, ": ", T3) P_k = [(T1[u] + T2[u] + T3[u]) for u in range(len(data[0]))] # renormalize in order to have a proba distribution P_k = [u / sum(P_k) for u in P_k] P.append(P_k) #we need to ensure that P and Pi are not too low (the likelihood would explode) P = np.maximum(P, 10**(-100)) Pi = np.maximum(Pi, 10**(-100)) return P, Pi
def approximate_generic_sys_restoration(sc, fc, sys_frag, output_array_given_recovery): SYS_DS = fc.sys_dmg_states sys_fn = pd.DataFrame(index=sc.restoration_time_range, columns=[fc.sys_dmg_states]) sys_fn.fillna(1) sys_fn.index.name = "Time in " + sc.time_unit for ds in range(len(SYS_DS)): fn_tmp = np.zeros((sc.num_hazard_pts, sc.num_time_steps)) ids = {} # index of damage states within the samples for p in range(sc.num_hazard_pts): ids[p] = np.where(sys_frag[:, p] == ds)[0] m = np.mean(output_array_given_recovery[ids[p], p, :], axis=0) fn_tmp[p] = m / fc.nominal_production sys_fn[SYS_DS[ds]] = stats.nanmean(fn_tmp, axis=0) # sys_fn = sys_fn.drop('DS0 None', axis=1) sys_fn.to_csv(os.path.join(sc.output_path, 'system_restoration_profile.csv'), sep=',') return sys_fn
def moving_average (feedbacks, slot_n, prediction_length, mmc): past_delays_fitted=numpy.asarray(feedbacks) col_mean = stats.nanmean(past_delays_fitted,axis=0) col_std = stats.nanstd(past_delays_fitted,axis=0) inds = numpy.where(numpy.isnan(past_delays_fitted)) past_delays_fitted[inds]=numpy.take(col_mean,inds[1]) wifi_delays=past_delays_fitted[:,0] lte_delays=past_delays_fitted[:,1] if mmc: forward_predicted_delays_mmc=numpy.c_[numpy.random.normal(col_mean[0], col_std[0], prediction_length), numpy.random.normal(col_mean[1], col_std[1], prediction_length )] forward_predicted_delays_mmc[ forward_predicted_delays_mmc <0 ]=0 # truncate negative samples predicted_ma=numpy.r_ [past_delays_fitted, forward_predicted_delays_mmc] return col_mean, col_std, predicted_ma else: predicted_ma=numpy.r_ [past_delays_fitted, numpy.zeros((prediction_length,2)) ] for pl in range (past_delays_fitted.shape[0] , past_delays_fitted.shape[0]+prediction_length): predicted_ma[pl,0]= numpy.divide ( numpy.sum(numpy.divide( wifi_delays , range(wifi_delays.shape[0]+1,1,-1), dtype='float_' )) , numpy.sum(numpy.divide(1,range(wifi_delays.shape[0]+1,1,-1),dtype='float_')), dtype='float_' ) predicted_ma[pl,1]= numpy.divide ( numpy.sum(numpy.divide( lte_delays , range(lte_delays.shape[0]+1,1,-1), dtype='float_' )) , numpy.sum(numpy.divide(1,range(lte_delays.shape[0]+1,1,-1),dtype='float_')), dtype='float_' ) return predicted_ma
def onselect(xmin, xmax): """ A select event handler for the matplotlib SpanSelector widget. Selects a min/max range of the x or y axes for a matplotlib Axes. """ # convert matplotlib float dates to a datetime format date_min = mdates.num2date(xmin) date_max = mdates.num2date(xmax) # put the xmin and xmax in datetime format to compare date_min = datetime.datetime(date_min.year, date_min.month, date_min.day, date_min.hour, date_min.minute) date_max = datetime.datetime(date_max.year, date_max.month, date_max.day, date_max.hour, date_max.minute) # find the indices that were selected indices = np.where((dates >= date_min) & (dates <= date_max)) indices = indices[0] # set the data in second plot plot2.set_data(dates[indices], parameter['data'][indices]) # calculate new mean, max, min param_mean = nanmean(parameter['data'][indices]) param_max = np.nanmax(parameter['data'][indices]) param_min = np.nanmin(parameter['data'][indices]) ax2.set_xlim(dates[indices][0], dates[indices][-1]) ax2.set_ylim(param_min, param_max) # show text of mean, max, min values on graph; use matplotlib.patch.Patch properies and bbox text3 = 'mean = %.2f\nmax = %.2f\nmin = %.2f' % (param_mean, param_max, param_min) ax2_text.set_text(text3) fig.canvas.draw()
def comp_ind_rate(self,pop_id = 'all',time_range=[],nmax=100): '''compute rate of individual neurons for nmax neurons and arrange them in order and take average''' Rate_tot = [] self.load_spikes() spikes = self.events['spikes'] if len(spikes)==0: print 'Comp Rate: spike array is empty!' return np.nan,np.nan print 'Comp Rate' pop_id,spikes,neuron_nr = self.get_pop_spikes(spikes,nmax,pop_id) if len(spikes) == 0: print 'Comp Rate: population has no spikes!' return np.nan,np.nan if time_range!=[]: idx = (spikes[:,1]>=time_range[0]) & (spikes[:,1]<=time_range[1]) spikes = spikes[idx,:] if time_range==[]: total_time = self.pars['T_total'] else: total_time = time_range[1] - time_range[0] ids = np.unique(spikes[:,0])[:nmax] if len(ids)==0: return np.nan,np.nan dtype = [('id',float),('rate',float)] for id_nr in ids: idx = spikes[:,0] == id_nr spikes2 = spikes[idx,1] rate_ind = (len(spikes2)/total_time)*1000 rate_ind = (id_nr,rate_ind) Rate_tot.append(rate_ind) Rate_tot = np.array(Rate_tot, dtype = dtype) sort_rate = np.sort(Rate_tot, order = 'rate')[::-1] sort_rate = sort_rate[:int(neuron_nr/10)] Rate_mean = st.nanmean(sort_rate['rate']) return Rate_mean,sort_rate
def standardize_col(dat, meanonly=False): ''' Mean impute each columns of an array. ''' colmean = st.nanmean(dat) if ~meanonly: colstd = st.nanstd(dat) else: colstd = None ncol = dat.shape[1] nmissing = sp.zeros((ncol)) datimp = sp.empty_like(dat) datimp[:] = dat for c in sp.arange(0, ncol): datimp[sp.isnan(datimp[:, c]), c] = colmean[c] datimp[:, c] = datimp[:, c] - colmean[c] if not meanonly: if colstd[c] > 1e-6: datimp[:, c] = datimp[:, c] / colstd[c] else: print "warning: colstd=" + colstd[c] + " during normalization" nmissing[c] = float(sp.isnan(dat[:, c]).sum()) fracmissing = nmissing / dat.shape[0] return datimp, fracmissing
def plot_autocorrelation(self): """Plot the autocorrelation as a function of height of the mean front relative frame. """ fig, ax = plt.subplots() U = stats.nanmean(self.uf, axis=1) # correlate two 1d arrays # np.correlate(U, U, mode='full')[len(U) - 1:] # but we want to autocorrelate a 2d array over a given # axis N = U.shape[1] pad_N = N * 2 - 1 s = np.fft.fft(U, n=pad_N, axis=1) acf = np.real(np.fft.ifft(s * s.conjugate(), axis=1))[:, :N] # normalisation acf0 = np.expand_dims(acf[:, 0], 1) acf = acf / acf0 fig, ax = plt.subplots(nrows=2) c0 = ax[0].contourf(U, self.levels) c1 = ax[1].contourf(acf, 100) fig.colorbar(c0, ax=ax[0], use_gridspec=True) fig.colorbar(c1, ax=ax[1], use_gridspec=True) ax[0].set_title(r'$\overline{u_x}(z, t)$') ax[0].set_xlabel('time') ax[0].set_ylabel('z') ax[1].set_title('autocorrelation') ax[1].set_xlabel('lag') ax[1].set_ylabel('z') fig.tight_layout() return fig
def make_daily_mean(ncfile, nmu): g = 9.81 #tx1=int(t1)-30 #tx2=int(t2)-30 #ty1=int(t1)+30 #ty2=int(t2)+30 #dirname=os.path.dirname(ncfile) print('reading') lat, lev, lon, time, ta, ua, va, wapa, dens, psurf, ls = read_nc( ncfile, nmu) #p=lev:] p = lev[:] * 100 nlat = len(lat[:]) nlev = len(lev[:]) nlon = len(lon[:]) ntime = len(time[:]) print('densa') densa = np.zeros((ntime, nlev, nlat, nlon)) densa = np.reshape(p, (1, nlev, 1, 1)) / (R_spec * ta[:, :, :, :]) #wa = - densa[:,:,:,:] * g * wapa[:,:,:,:] wa = wapa[:, :, :, :] * 100 alphaa = 1 / densa[:, :, :, :] #betaa = ~np.isnan(ta[:,:,:,:]) if True: tx = np.zeros((ntime, nlev, nlat, nlon)) tx[:] = np.NaN ux = tx vx = tx wapx = tx densx = tx wx = tx #psurfx = np.zeros((ntime,nlat,nlon)) alphax = tx betax = tx if True: ty = np.zeros((ntime, nlev, nlat, nlon)) ty[:] = np.NaN uy = ty vy = ty wapy = ty densy = ty wy = ty #psurfy = np.zeros((ntime,nlat,nlon)) alphay = ty betay = ty print('z') tz = np.concatenate((tx[:, :, :, :], ta[:, :, :, :], ty[:, :, :, :]), axis=0) uz = np.concatenate((ux[:, :, :, :], ua[:, :, :, :], uy[:, :, :, :]), axis=0) vz = np.concatenate((vx[:, :, :, :], va[:, :, :, :], vy[:, :, :, :]), axis=0) wapz = np.concatenate( (wapx[:, :, :, :], wapa[:, :, :, :], wapy[:, :, :, :]), axis=0) densz = np.concatenate( (densx[:, :, :, :], densa[:, :, :, :], densy[:, :, :, :]), axis=0) #psurfz=np.concatenate((psurfx[:,:,:],psurfa[:,:,:],psurfy[:,:,:]),axis=0) wz = np.concatenate((wx[:, :, :, :], wa[:, :, :, :], wy[:, :, :, :]), axis=0) alphaz = np.concatenate( (alphax[:, :, :, :], alphaa[:, :, :, :], alphay[:, :, :, :]), axis=0) nlat2 = len(lat[:]) nlev2 = len(lev[:]) nlon2 = len(lon[:]) ntime2 = 3 * len(time[:]) #weightz = make_weight(psurfz,p,ntime2,nlev2,nlat2,nlon2) #betaz=np.concatenate((betax[:,:,:,:],betaa[:,:,:,:],betay[:,:,:,:]),axis=0)#*np.nan_to_num(weightz) print('out') #ntimes = ntime - 6 * int(nofy) - 6 * int(nofx) t = np.zeros((ntime, nlev, nlat, nlon)) u = np.zeros((ntime, nlev, nlat, nlon)) v = np.zeros((ntime, nlev, nlat, nlon)) wap = np.zeros((ntime, nlev, nlat, nlon)) dens = np.zeros((ntime, nlev, nlat, nlon)) psurf = 0 # np.zeros((ntime,nlat,nlon)) wout = np.zeros((ntime, nlev, nlat, nlon)) alphaout = np.zeros((ntime, nlev, nlat, nlon)) #betaout = np.zeros((ntime,nlev,nlat,nlon)) print('time_mean') for i in range(ntime): ti1 = ntime + i - 3 ti2 = ntime + i + 3 t[i, :, :, :] = stats.nanmean(tz[ti1:ti2, :, :, :], axis=0) u[i, :, :, :] = stats.nanmean(uz[ti1:ti2, :, :, :], axis=0) v[i, :, :, :] = stats.nanmean(vz[ti1:ti2, :, :, :], axis=0) wap[i, :, :, :] = stats.nanmean(wapz[ti1:ti2, :, :, :], axis=0) dens[i, :, :, :] = stats.nanmean(densz[ti1:ti2, :, :, :], axis=0) #psurf[i,:,:] = stats.nanmean(psurfz[ti1:ti2,:,:],axis=0) #psurf=0 wout[i, :, :, :] = stats.nanmean(wz[ti1:ti2, :, :], axis=0) alphaout[i, :, :, :] = stats.nanmean(alphaz[ti1:ti2, :, :], axis=0) #betaout[i,:,:,:] = stats.nanmean(betaz[ti1:ti2,:,:],axis=0) #is not nan #weightx = make_weight(psurf,p,ntime,nlev,nlat,nlon) #betaout=betaout#*np.nan_to_num(weightx) print('del') del (tz, tx, ta, ty) del (uz, ux, ua, uy) del (vz, vx, va, vy) del (wapz, wapx, wapa, wapy) del (densz, densx, densa, densy) del (wz, wx, wa, wy) #uz=np.concatenate((ux[:,:,:,:],ua[:,:,:,:],uy[:,:,:,:]),axis=0) #vz=np.concatenate((vx[:,:,:,:],va[:,:,:,:],vy[:,:,:,:]),axis=0) #wapz=np.concatenate((wapx[:,:,:,:],wapa[:,:,:,:],wapy[:,:,:,:]),axis=0) #densz=np.concatenate((densx[:,:,:,:],densa[:,:,:,:],densy[:,:,:,:]),axis=0) ##psurfz=np.concatenate((psurfx[:,:,:],psurfa[:,:,:],psurfy[:,:,:]),axis=0) #wz=np.concatenate((wx[:,:,:,:],wa[:,:,:,:],wy[:,:,:,:]),axis=0) #alphaz return lat, lev, lon, time, t, u, v, wap, dens, psurf, ls, wout, alphaout
dlat[0] = dlat_np dlat[nlat - 1] = dlat_sp for i in range(1, nlat - 1): dlat[i] = (lat[i - 1] + lat[i]) / 2.0 - (lat[i] + lat[i + 1]) / 2.0 if dlat[3] < 0: dlat[0] = -dlat[0] dlat[nlat - 1] = -dlat[nlat - 1] dlat_rad = dlat / 180.0 * np.pi print(dlat_rad) print(np.sum(dlat)) dlam = lon[0] - lon[1] dlam_rad = dlam / 180.0 * np.pi dlam_rad_int = np.absolute(dlam_rad) vzmR = stats.nanmean(v[:, :, :, :], axis=3) uzmR = stats.nanmean(u[:, :, :, :], axis=3) wzmR = stats.nanmean(w[:, :, :, :], axis=3) uvsq = vzmR[:, :, :]**2 + uzmR[:, :, :]**2 vstar = v[:, :, :, :] - np.reshape(vzmR[:, :, :], (ntime, nlev, nlat, 1)) ustar = u[:, :, :, :] - np.reshape(uzmR[:, :, :], (ntime, nlev, nlat, 1)) wstar = w[:, :, :, :] - np.reshape(wzmR[:, :, :], (ntime, nlev, nlat, 1)) uvstarsq = vstar[:, :, :, :]**2 + ustar[:, :, :, :]**2 uvstarsqzm = stats.nanmean(uvstarsq[:, :, :, :], axis=3) ieq = nlat / 2 #####################################################
def meanstd(x,axis=None): return stats.nanmean(x,axis),stats.nanstd(x,axis)