def add_to_results(model, name): df = getattr(model, name) model.results['param'].append(name) model.results['bias'].append(df['abs_err'].mean()) model.results['mae'].append((pl.median(pl.absolute(df['abs_err'].dropna())))) model.results['mare'].append(pl.median(pl.absolute(df['rel_err'].dropna()))) model.results['pc'].append(df['covered?'].mean())
def trace_fibers(flatim, params): trace_im1 = pf.getdata(flatim)*0 imdat, imhead = pf.getdata(flatim), pf.getheader(flatim) ### separating fibers in first column and assigning fibers ids print '\n\tSEARCHING FOR FIBERS BETWEEN x=0 & y=[' +str(params.LO_BUFFER) +':'+str(len(imdat[:,0])-params.HI_BUFFER)+']' fiber_peaks_pix = pl.find( imdat[:,0] > pl.median(imdat[:,0]) ) fiber_peaks_pix,fiber_peaks_flx = sep_peaks( fiber_peaks_pix, imdat[:,0] ) if pl.median(fiber_peaks_pix[0])<=params.LO_BUFFER: fiber_peaks_pix = fiber_peaks_pix[1:] fiber_peaks_flx = fiber_peaks_flx[1:] if (len(imdat[:,0])-pl.median(fiber_peaks_pix[-1]))<=params.HI_BUFFER: fiber_peaks_pix = fiber_peaks_pix[:-1] fiber_peaks_flx = fiber_peaks_flx[:-1] print '\t --> FOUND ', len(fiber_peaks_pix), ' FIBER PEAKS' ### creating array for fibers fibers0 = [] id_cnt = 1 for f in range(len(fiber_peaks_pix)): while params.FIBERS_EXCLUDE.tolist().count(id_cnt)==1: id_cnt+=1 fibx,fiby = fiber_peaks_pix[f],fiber_peaks_flx[f] peakx = fibx[ fiby.tolist().index(max(fiby)) ] yrange = pl.arange( peakx-params.FIBER_WIDTH/2 , peakx+params.FIBER_WIDTH/2+1 ) fibers0.append( fiber(id_cnt, 0, yrange )) id_cnt+=1 ## TRACING FIBERS ALONG X-AXIS INDIVIDUALLY for fib in fibers0: for x in range(1,len(imdat)): ## FIRST, TAKE THE FLUXES IN THE PIXELS AT x ## THAT ENCOMPASSED THE PEAK AT x-1 fluxes = imdat[ fib.xy[-1][1] , x ] ## NEXT, FIND THE VERTICAL SHIFT TO CENTER ON ## THE PEAK FLUX AT x. MAXIMUM SHIFT IS DETERMINED ## FROM THE FIBER_WIDTH PARAMETER. deltay = range( -len(fluxes)/2+1 , len(fluxes)/2+1 )[ fluxes.tolist().index(max(fluxes)) ] ## RECORD THE NEW Y-PIXELS THAT ARE CENTERD ON ## THE FIBER AT x. fib.xy.append( [ x, fib.xy[-1][1]+deltay ] ) ## FLAG PIXELS FOR FIBER IN FIRST-PASS TRACE IMAGE trace_im1[fib.xy[-1][1],x] = fib.id trc0 = 'trace_pass1.fits' print '\n\tWRITING INITIAL TRACING TO ', trc0 try: pf.writeto(trc0, trace_im1, header=imhead) except: os.remove(trc0) pf.writeto(trc0, trace_im1, header=imhead) return fibers0
def add_to_results(model, name): df = getattr(model, name) model.results['param'].append(name) model.results['bias'].append(df['abs_err'].mean()) model.results['mae'].append( (pl.median(pl.absolute(df['abs_err'].dropna())))) model.results['mare'].append(pl.median(pl.absolute( df['rel_err'].dropna()))) model.results['pc'].append(df['covered?'].mean())
def combine_output(J, T, model, dir, reps, save=False): """ Combine output on absolute error, relative error, csmf_accuracy, and coverage from from multiple runs of validate_once. Either saves the output to the disk, or returns arays for each. """ cause = pl.zeros(J*T, dtype='f').view(pl.recarray) time = pl.zeros(J*T, dtype='f').view(pl.recarray) abs_err = pl.zeros(J*T, dtype='f').view(pl.recarray) rel_err = pl.zeros(J*T, dtype='f').view(pl.recarray) coverage = pl.zeros(J*T, dtype='f').view(pl.recarray) csmf_accuracy = pl.zeros(J*T, dtype='f').view(pl.recarray) for i in range(reps): metrics = pl.csv2rec('%s/metrics_%s_%i.csv' % (dir, model, i)) cause = pl.vstack((cause, metrics.cause)) time = pl.vstack((time, metrics.time)) abs_err = pl.vstack((abs_err, metrics.abs_err)) rel_err = pl.vstack((rel_err, metrics.rel_err)) coverage = pl.vstack((coverage, metrics.coverage)) csmf_accuracy = pl.vstack((csmf_accuracy, metrics.csmf_accuracy)) cause = cause[1:,] time = time[1:,] abs_err = abs_err[1:,] rel_err = rel_err[1:,] coverage = coverage[1:,] csmf_accuracy = csmf_accuracy[1:,] mean_abs_err = abs_err.mean(0) median_abs_err = pl.median(abs_err, 0) mean_rel_err = rel_err.mean(0) median_rel_err = pl.median(rel_err, 0) mean_csmf_accuracy = csmf_accuracy.mean(0) median_csmf_accuracy = pl.median(csmf_accuracy, 0) mean_coverage_bycause = coverage.mean(0) mean_coverage = coverage.reshape(reps, T, J).mean(0).mean(1) percent_total_coverage = (coverage.reshape(reps, T, J).sum(2)==3).mean(0) mean_coverage = pl.array([[i for j in range(J)] for i in mean_coverage]).ravel() percent_total_coverage = pl.array([[i for j in range(J)] for i in percent_total_coverage]).ravel() models = pl.array([[model for j in range(J)] for i in range(T)]).ravel() true_cf = metrics.true_cf true_std = metrics.true_std std_bias = metrics.std_bias all = pl.np.core.records.fromarrays([models, cause[0], time[0], true_cf, true_std, std_bias, mean_abs_err, median_abs_err, mean_rel_err, median_rel_err, mean_csmf_accuracy, median_csmf_accuracy, mean_coverage_bycause, mean_coverage, percent_total_coverage], names=['model', 'cause', 'time', 'true_cf', 'true_std', 'std_bias', 'mean_abs_err', 'median_abs_err', 'mean_rel_err', 'median_rel_err', 'mean_csmf_accuracy', 'median_csmf_accuracy', 'mean_covearge_bycause', 'mean_coverage', 'percent_total_coverage']) if save: pl.rec2csv(all, '%s/%s_summary.csv' % (dir, model)) else: return all
def epsetstmax(path): dico = load_spatial_means(path) time = dico["t"] eps = dico["epsK_tot"] + dico["epsA_tot"] E = dico["EK"] + dico["EA"] # if 'noise' in path: if eps.max() > 2 * eps[-1]: def f(x, amptan, ttan): return amptan * pl.tanh(2 * (x / ttan)**4) guesses = np.array([pl.median(eps), time[eps == eps.max()][0]]) else: # def f(x, amptan, ttan, amplog, sigma): def f(x, amptan, ttan, amplog, tlog, sigma): return amptan * pl.tanh( 2 * (x / ttan)**4) + amplog * stats.lognorm.pdf( x, scale=pl.exp(tlog), s=sigma) guesses = np.array( ( # amptan' pl.median(eps), # ttan' time[eps == eps.max()], # amplog eps.max(), # tlog time[eps == eps.max()], # sigma eps.std(), ), dtype=float, ) # guesses = pl.array(list(guessesd.values()), dtype=float) try: popt, pcov = curve_fit(f, time, eps, guesses, maxfev=3000) except RuntimeError: print("Error while curve fitting data from path =", path) raise eps_fit = f(time, *popt) eps_stat = float(eps_fit[-1]) try: # idx = _index_flat(eps_fit, time) idx = locate_knee(time, eps_fit, eps_stat) time_stat = time[idx] except ValueError: raise ValueError("While calculating curvature in {}".format(path)) # warn("While calculating curvature in {}".format(path)) # time_stat = popt[1] + 6 * popt[3] E_stat = E[idx:].mean() return eps_stat, E_stat, time_stat, time[-1]
def loadFile(objectFileName): oimg = pyfits.open(objectFileName) # Load the IFU data -- Row-stacked spectra odata = oimg[1].data oError = oimg[2].data odata_dim = odata.shape wcs = astWCS.WCS(objectFileName, extensionName=1) owavelengthStartEnd = wcs.getImageMinMaxWCSCoords()[0:2] fiberNumber = wcs.getImageMinMaxWCSCoords()[2:4] owavelengthStep = oimg[1].header['CDELT1'] owavelengthRange = [owavelengthStartEnd[0] + i * owavelengthStep for i in range(odata_dim[1])] # Check to make sure we got it right if not owavelengthRange[-1] == owavelengthStartEnd[-1]: print 'The ending wavelenghts do not match... Exiting' sys.exit(1) else: # make median sky specs = pyl.array([flux for flux in odata]) skySpec = pyl.median(specs, axis=0) RSS = [] for i in range(int(fiberNumber[1])): #oflux = odata[i] - oskyflux oflux = odata[i] - skySpec oflux[pyl.isnan(oflux)] = 0.0 oErrorFlux = oError[i] #oflux = odata[i] # Mask out extreme values in spectrum # Just because edges dodgy in efosc med = pyl.median(oflux) oflux[pyl.greater(abs(oflux), 10.0 * med)] = 0.0001 objSED = astSED.SED(wavelength=owavelengthRange, flux=oflux) #skySED = astSED.SED(wavelength=owavelengthRange, flux=oskyflux) skySED = astSED.SED(wavelength=owavelengthRange, flux=skySpec) errSED = astSED.SED(wavelength=owavelengthRange, flux=oErrorFlux) # make it > 0 everywhere objSED.flux = objSED.flux - objSED.flux.min() objSED.flux = objSED.flux / objSED.flux.max() errSED.flux = errSED.flux - errSED.flux.min() errSED.flux = errSED.flux / errSED.flux.max() skySED.flux = skySED.flux - skySED.flux.min() skySED.flux = skySED.flux / skySED.flux.max() RSS.append({'object': objSED, 'sky': skySED, 'error': errSED}) return RSS
def one_ci(v, ci, bootstraps): v = pylab.array(v) v = pylab.ma.masked_array(v,pylab.isnan(v)).compressed() if v.size == 0: return pylab.nan, 0, 0 #Nothing to compute r = pylab.randint(v.size, size=(v.size, bootstraps)) booted_samp = pylab.array([pylab.median(v[r[:,n]]) for n in xrange(bootstraps)]) booted_samp.sort() med = pylab.median(booted_samp) idx_lo = int(bootstraps * ci/2.0) idx_hi = int(bootstraps * (1.0-ci/2)) return med, med-booted_samp[idx_lo], booted_samp[idx_hi]-med
def binit(x, y, n): #bin arrays x, y into n bins, returning xbin,ybin nx = len(x) y = N.take(y, N.argsort(x)) #sort y according to x rankings x = N.take(x, N.argsort(x)) xbin = N.zeros(n, 'f') ybin = N.zeros(n, 'f') for i in range(n): nmin = i * int(float(nx) / float(n)) nmax = (i + 1) * int(float(nx) / float(n)) xbin[i] = pylab.median(x[nmin:nmax]) ybin[i] = pylab.median(y[nmin:nmax]) #xbin[i]=N.average(x[nmin:nmax]) #ybin[i]=N.average(y[nmin:nmax]) #ybinerr[i]=scipy.stats.std(y[nmin:nmax]) return xbin, ybin #, ybinerr
def computeMAD(self, a, c=0.6745, axis=0): a = np.array(a) if a.ndim == 1: d = pl.median(a) m = pl.median(np.fabs(a - d) / c) elif (a.ndim > 1): d = pl.median(a, axis=axis) if axis > 0: aswp = swapaxes(a, 0, axis) else: aswp = a m = pl.median(np.fabs(aswp - d) / c, axis=0) else: m = 0 return m
def remove_discontinuity(value, xgap=10, ygap=200): """ Remove discontinuity (sudden jump) in a series of values. Written by Denis, developed for LLC Fringe Counts data. value : list or numpy.array xgap : "width" of index of the list/array to adjust steps ygap : threshold value to detect discontinuity """ difflist = pl.diff(value) discont_index = pl.find(abs(difflist) > ygap) if len(discont_index) == 0: return value else: discont_index = pl.append(discont_index, len(difflist)) # find indice at discontinuities discont = {"start": [], "end": []} qstart = discont_index[0] for i in range(len(discont_index) - 1): if discont_index[i + 1] - discont_index[i] > xgap: qend = discont_index[i] discont["start"].append(qstart - xgap) discont["end"].append(qend + xgap) qstart = discont_index[i + 1] # add offsets at discontinuities result = pl.array(value) for i in range(len(discont["end"])): result[0 : discont["start"][i]] += result[discont["end"][i]] - result[discont["start"][i]] # remove the median result = result - pl.median(result) return result
def read_align(self, readfile): self.df = pd.read_csv(readfile, sep='\t', header=None) self.df.columns = ['ref', 'start', 'end', 'dummy', 'quality', 'strand'] from pylab import median self.read_length = round(median(self.df['end'] - self.df['start'])) self.chromosomes = self.df['ref'].unique()
def scatter_times(name, sheets): means = [] medians = [] delays = [] mean_points = [] med_points = [] for sheet, delay in sheets: delays.append(delay) times = get_times(sheet) mean = pylab.mean(times) median = pylab.median(times) means.append(mean) medians.append(median) mean_points.append((mean, sheet)) med_points.append((median, sheet)) print "----------mean points-----------" for mean, sheet in sorted(mean_points): print mean, sheet print "----------median points-----------" for median, sheet in sorted(med_points): print median, sheet pylab.scatter(delays, means, color='r') pylab.scatter(delays, medians, color='b') print "show" pylab.show()
def makePlots(self, ax, query, fNum, fColor, fMarker, feedstock): query.getQuery() if query.queryString.startswith('No'): pass elif query.queryString.startswith('FR'): data = [1, 1] ax.plot([fNum] * 2, [1, 1], fColor, marker=fMarker, markersize=2) else: data = self.db.output(query.queryString, self.db.schema) medVal = median(data) maxVal = max(data) minVal = min(data) ax.plot([fNum], medVal, fColor, marker='_', markersize=7) #Plot the max/min values ax.plot([fNum] * 2, [maxVal, minVal], fColor, marker=fMarker, markersize=2) self.writeResults(feedstock, str(maxVal[0]), str(medVal), str(minVal[0]))
def binitbinsfix(binsleft, binsright, x, y): #give bins for binning nbin = len(binsleft) xbin = N.zeros(len(binsleft), 'f') ybin = N.zeros(len(binsleft), 'f') ybinerr = N.zeros(len(binsleft), 'f') y = N.take(y, N.argsort(x)) #sort y according to x rankings x = N.take(x, N.argsort(x)) j = -1 for i in range(len(xbin)): xmin = binsleft[i] xmax = binsright[i] yb = [] for j in range(len(x)): if x[j] > xmin: yb.append(y[j]) if x[j] > xmax: yb = N.array(yb, 'f') xbin[i] = 0.5 * (xmin + xmax) try: ybin[i] = pylab.median(yb) ybinerr[i] = pylab.std(yb) / N.sqrt(1. * len(yb)) except ZeroDivisionError: print "warning: ZeroDivision error in binitbinsfix" ybin[i] = 0. ybinerr[i] = 0. break return xbin, ybin, ybinerr
def smooth(x, window_len=11, window='flat', rms=0): if window == 'median': movmed, movrms = [], [] for i in range(len(x)): low = max(0, i-window_len/2) high = min(len(x), i+window_len/2) movmed.append(median(x[low:high])) movrms.append(numpy.std(x[low:high])) if rms == 0: return array(movmed) else: return array(movmed), array(movrms) else: if x.ndim != 1: raise ValueError, "smooth only accepts 1 dimension arrays." if x.size < window_len: raise ValueError, "Input vector needs to be bigger than window size." if window_len<3: return x if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']: raise ValueError, "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'" s=numpy.r_[2*x[0]-x[window_len-1::-1],x,2*x[-1]-x[-1:-window_len:-1]] #print(len(s)) if window == 'flat': #moving average w=numpy.ones(window_len,'d') else: w=eval('numpy.'+window+'(window_len)') y = numpy.convolve(w/w.sum(),s,mode='same') return array(y[window_len:-window_len+1])
def one_ci(v, ci, bootstraps): v = pylab.array(v) v = pylab.ma.masked_array(v, pylab.isnan(v)).compressed() if v.size == 0: return pylab.nan, 0, 0 #Nothing to compute r = pylab.randint(v.size, size=(v.size, bootstraps)) booted_samp = pylab.array( [pylab.median(v[r[:, n]]) for n in xrange(bootstraps)]) booted_samp.sort() med = pylab.median(booted_samp) idx_lo = int(bootstraps * ci / 2.0) idx_hi = int(bootstraps * (1.0 - ci / 2)) return med, med - booted_samp[idx_lo], booted_samp[idx_hi] - med
def plotter(key, sims, ax, label='', ylabel='', low_q=0.05, high_q=0.95, startday=None): color = cv.get_colors()[key.split('_')[1]] ys = [] for s in sims: ys.append(s.results[key].values) yarr = np.array(ys) best = pl.median(yarr, axis=0) low = pl.quantile(yarr, q=low_q, axis=0) high = pl.quantile(yarr, q=high_q, axis=0) sim = sims[0] tvec = np.arange(len(best)) fill_label = None pl.fill_between(tvec, low, high, facecolor=color, alpha=0.2, label=fill_label) pl.plot(tvec, best, c=color, label=label, lw=4, alpha=1.0) sc.setylim() datemarks = pl.array([sim.day('2020-03-01'),sim.day('2020-05-01'),sim.day('2020-07-01'), sim.day('2020-09-01')]) ax.set_xticks(datemarks) pl.ylabel(ylabel) return
def fit_tri_gaussian(wav, flux, flux_err, name_line, wav_line, rline=None, isplot=False, silent=True): p0 = [pl.median(flux), wav_line[0], 2., 10., wav_line[1], 2., 10., wav_line[2], 2., 10.] parinfo = [{'value': 0., 'fixed': 0, 'limited': [0, 0], 'limits': [0., 0.], 'tied': '', 'parname': 'name'}] parinfo[0]['value'] = p0[0] parinfo[0]['parname'] = 'constant' parinfo.extend(generate_parinfo(p0=p0[1:4], pname=['wav_center1', 'width1', 'area1'])) parinfo.extend(generate_parinfo(p0=p0[4:7], pname=['wav_center2', 'width2', 'area2'])) parinfo.extend(generate_parinfo(p0=p0[7:10], pname=['wav_center3', 'width3', 'area3'])) if rline is not None: if rline[0] is not None: parinfo[3]['tied'] = str(rline[0]) + ' * p[9]' if rline[1] is not None: parinfo[6]['tied'] = str(rline[1]) + ' * p[9]' fdata = {'x': wav, 'y': flux, 'err': flux_err, 'ngaussian': 3} res = mpfit(myfunct_gaussian, p0, parinfo=parinfo, functkw=fdata, quiet=silent) if (res.status < 0) or (res.perror is None): print('error message = ', res.errmsg) return emptyline(name_line, wav_line) line1 = LineProfile(name_line[0], res.params[1], res.params[3], res.perror[3], res.params[2], res.perror[2], res.params[0]) line2 = LineProfile(name_line[1], res.params[4], res.params[6], res.perror[6], res.params[5], res.perror[5], res.params[0]) line3 = LineProfile(name_line[2], res.params[7], res.params[9], res.perror[9], res.params[8], res.perror[8], res.params[0]) if isplot: line1.spec = [wav, flux] line2.spec = [wav, flux] line3.spec = [wav, flux] return [line1, line2, line3]
def flow_rate_hist(sheets): ant_rates = [] weights = [] for sheet in sheets: ants, seconds, weight = flow_rate(sheet) ant_rate = seconds / ants #ant_rate = ants / seconds ant_rates.append(ant_rate) weights.append(float(weight)) #weights.append(seconds) weights = pylab.array(weights) weights /= sum(weights) #print "ants per second" print "seconds per ant" mu = pylab.mean(ant_rates) print "mean", pylab.mean(ant_rates) wmean = pylab.average(ant_rates, weights=weights) print "weighted mean", wmean print "median", pylab.median(ant_rates) print "std", pylab.std(ant_rates, ddof=1) ant_rates = pylab.array(ant_rates) werror = (ant_rates - mu) * weights print "weighted std", ((sum(werror ** 2))) ** 0.5 print "weighted std 2", (pylab.average((ant_rates - mu)**2, weights=weights)) ** 0.5 pylab.figure() pylab.hist(ant_rates) pylab.savefig('ant_flow_rates.pdf', format='pdf') pylab.close()
def pitch_estimate(dw): step = 8 wsize = 2048 wfun = pl.ones wa = 3 lo, hi = 50, 700 hist_params = dict(bins=800, lw=0, range=[lo,hi], rwidth=1.0, normed=True, log=True) subplots = wplot.Subplots(6, 1, yticks=[0,.1,.25,.5,1], xlim=(120,240), autoscalex_on=False) for wfun in [pl.hanning, pl.hamming, pl.blackman, pl.bartlett, pl.ones]: cc = chunker.Chunker(dw, window=wfun(wsize), step=step) acs = [cc.ac for c in cc.chunks()] pp = [chunker.find_peak(a, lo, hi, wa=wa) for a in acs] mm = pl.median(pp) subplots( title='window: %s(%d) step=%s range=%s wa=%d' % (wfun.func_name, wsize, step, [lo,hi], wa), xticks=[mm]+range(lo,hi+50,50)) subplots.next() freq, bins, patches = pl.hist(pp, **hist_params) print 'Ok!'
def makePlots(self, ax, x, fNum, fColor, fMarker, feedstock): x.getQuery() if x.queryString.startswith('No'): pass elif x.queryString.startswith('FR'): data = [1,1] ax.plot([fNum]*2,[1,1],fColor,marker=fMarker,markersize=2) else: cur = self.conn.cursor() print x.queryString cur.execute(x.queryString) #[all data] data = cur.fetchall() cur.close() medVal = median(data) maxVal = max(data) minVal = min(data) ax.plot([fNum],medVal,fColor,marker='_', markersize=7) #Plot the max/min values ax.plot([fNum]*2,[maxVal, minVal],fColor,marker=fMarker, markersize=2) self.writeResults(feedstock, str(maxVal[0]), str(medVal), str(minVal[0]))
def get_age_sex(is_crew=False, min_age=18, max_age=99, crew_age=35, crew_std=5, guest_age=68, guest_std=8): ''' Define age-sex distributions. Passenger age distribution based on: https://www.nytimes.com/reuters/2020/02/12/world/asia/12reuters-china-health-japan.html "About 80% of the passengers were aged 60 or over [=2130], with 215 in their 80s and 11 in the 90s, the English-language Japan Times newspaper reported." ''' # Define female (0) or male (1) -- evenly distributed sex = pl.randint(2) # Define age distribution for the crew and guests if is_crew: age = pl.normal(crew_age, crew_std) else: age = pl.normal(guest_age, guest_std) # Normalize age = pl.median([min_age, age, max_age]) return age, sex
def remove_discontinuity(value, xgap=10, ygap=200): """ Remove discontinuity (sudden jump) in a series of values. Written by Denis, developed for LLC Fringe Counts data. value : list or numpy.array xgap : "width" of index of the list/array to adjust steps ygap : threshold value to detect discontinuity """ difflist = pl.diff(value) discont_index = pl.find(abs(difflist) > ygap) if len(discont_index) == 0: return value else: discont_index = pl.append(discont_index, len(difflist)) # find indice at discontinuities discont = {'start': [], 'end': []} qstart = discont_index[0] for i in range(len(discont_index)-1): if discont_index[i+1]-discont_index[i] > xgap: qend = discont_index[i] discont['start'].append(qstart-xgap) discont['end'].append(qend+xgap) qstart = discont_index[i+1] # add offsets at discontinuities result = pl.array(value) for i in range(len(discont['end'])): result[0:discont['start'][i]] += \ result[discont['end'][i]] - result[discont['start'][i]] #remove the median result=result-pl.median(result) return result
def mare(model, data_type): try: pred = model.vars[data_type]['p_pred'].trace().mean(0) except: pred = 0 obs = model.get_data(data_type)['value'] mare = pl.median((abs(pred - obs)/obs)*100) return mare
def mare(model, data_type): try: pred = model.vars[data_type]['p_pred'].trace().mean(0) except: pred = 0 obs = model.get_data(data_type)['value'] mare = pl.median((abs(pred - obs) / obs) * 100) return mare
def store_results(dm, area, sex, year): types_to_plot = 'p i r rr'.split() graphics.plot_convergence_diag(dm.vars) pl.clf() for i, t in enumerate(types_to_plot): pl.subplot(len(types_to_plot), 1, i + 1) graphics.plot_data_bars(dm.model.get_data(t)) pl.plot(range(101), dm.emp_priors[t, 'mu'], linestyle='dashed', color='grey', label='Emp. Prior', linewidth=3) pl.plot(range(101), dm.true[t], 'b-', label='Truth', linewidth=3) pl.plot(range(101), dm.posteriors[t].mean(0), 'r-', label='Estimate', linewidth=3) pl.errorbar(range(101), dm.posteriors[t].mean(0), yerr=1.96 * dm.posteriors[t].std(0), fmt='r-', linewidth=1, capsize=0) pl.ylabel(t) graphics.expand_axis() pl.legend(loc=(0., -.95), fancybox=True, shadow=True) pl.subplots_adjust(hspace=0, left=.1, right=.95, bottom=.2, top=.95) pl.xlabel('Age (Years)') pl.show() model = dm model.mu = pandas.DataFrame() for t in types_to_plot: model.mu = model.mu.append(pandas.DataFrame( dict(true=dm.true[t], mu_pred=dm.posteriors[t].mean(0), sigma_pred=dm.posteriors[t].std(0))), ignore_index=True) data_simulation.add_quality_metrics(model.mu) print '\nparam prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % ( model.mu['abs_err'].mean(), pl.median(pl.absolute( model.mu['rel_err'].dropna())), model.mu['covered?'].mean()) print data_simulation.initialize_results(model) data_simulation.add_to_results(model, 'mu') data_simulation.finalize_results(model) print model.results return model
def one_ci(pc, nsamp, ci, bootstraps): booted_p = boot_p(pc, nsamp, bootstraps) booted_p.sort() p = pylab.median(booted_p) idx_lo = int(bootstraps * ci / 2.0) idx_hi = int(bootstraps * (1.0 - ci / 2)) return p, p - booted_p[idx_lo], booted_p[idx_hi] - p
def one_ci(pc, nsamp, ci, bootstraps): booted_p = boot_p(pc, nsamp, bootstraps) booted_p.sort() p = pylab.median(booted_p) idx_lo = int(bootstraps * ci/2.0) idx_hi = int(bootstraps * (1.0-ci/2)) return p, p-booted_p[idx_lo], booted_p[idx_hi]-p
def plot_histogram(histogram, html_writer, title='', max_pathway_length=8, xmin=None, xlim=20, error_bars=True, min_to_show=20, legend_loc='upper left'): fig = pylab.figure() pylab.hold(True) reps = 1000 y_offset = 0 offset_step = 0.007 colors = {1:'r', 2:'orange', 3:'green', 4:'cyan', 5:'blue', 'Rest':'violet', 'Not first':'k--', 'No known regulation':'grey', 'Activated':'green', 'Inhibited':'r', 'Mixed regulation':'blue'} for key, value in histogram.iteritems(): if len(value) >= min_to_show: m = stats.cmedian(value) sample_std = None if error_bars: sample_vals = [] i = 0 while i < reps: samples = [] while len(samples) < len(value): samples.append(random.choice(value)) sample_vals.append(pylab.median(samples)) i += 1 sample_std = pylab.std(sample_vals) plotting.cdf(value, label='%s (med=%.1f, N=%d)' % \ (key, m, len(value)), style=colors.get(key, 'grey'), std=sample_std, y_offset=y_offset) y_offset += offset_step xmin = -1 * xlim if xmin == None else xmin pylab.xlim(xmin, xlim) pylab.xlabel('Irreversability') #pylab.xlabel('deltaG') pylab.ylabel('Cumulative distribution') legendfont = matplotlib.font_manager.FontProperties(size=11) pylab.legend(loc=legend_loc, prop=legendfont) pylab.title(title) pylab.hold(False) if 'Not first' in histogram: print '%s, first vs. non-first ranksum test: ' % title + '(%f, %f)' % stats.ranksums(histogram[1], histogram['Not first']) if 'Inhibited' in histogram: print '%s, inhibited vs. non-regulated ranksum test: ' % title + '(%f, %f)' % stats.ranksums(histogram['Inhibited'], histogram['No known regulation']) #for k1, h1 in histogram.iteritems(): # for k2, h2 in histogram.iteritems(): # print k1, k2, stats.ranksums(h1, h2) return fig
def get_stats(typeid,date): global db cur = db.cursor() cur.execute("SELECT AVG(price),SUM(volremain),SUM(volenter) - SUM(volremain),bid FROM archive_market WHERE typeid = %s AND (reportedtime) :: date = %s GROUP BY orderid,bid", [typeid, date]) a = cur.fetchall() avg_b = array(zeros(len(a)),dtype=float) vol_b = array(zeros(len(a)),dtype=float) move_b = array(zeros(len(a)),dtype=float) avg_s = array(zeros(len(a)),dtype=float) vol_s = array(zeros(len(a)),dtype=float) move_s = array(zeros(len(a)),dtype=float) x_s = 0 x_b = 0 for r in a: if r[3]: avg_b[x_b] = r[0] vol_b[x_b] = r[1] move_b[x_b] = r[2] x_b += 1 else: avg_s[x_s] = r[0] vol_s[x_s] = r[1] move_s[x_s] = r[2] x_s += 1 avg_b.resize(x_b) avg_s.resize(x_s) vol_b.resize(x_b) vol_s.resize(x_s) move_b.resize(x_b) move_s.resize(x_s) b = (None,None,None) s = (None,None,None) try: b = (pylab.median(avg_b), pylab.mean(vol_b), pylab.mean(move_b)) s = (pylab.median(avg_s), pylab.mean(vol_s), pylab.mean(move_s)) except: return (b,b,b) ret = ( ((b[0]+s[0])/2, (b[1]+s[1])/2, (b[2]+s[2])/2), b, s) print ret return ret
def DFA(data, npoints=None, degree=1, use_median=False): """ computes the detrended fluctuation analysis returns the fluctuation F and the corresponding window length L :args: data (n-by-1 array): the data from which to compute the DFA npoints (int): the number of points to evaluate; if omitted the log(n) will be used degree (int): degree of the polynomial to use for detrending use_median (bool): use median instead of mean fluctuation :returns: F, L: the fluctuation F as function of the window length L """ # max window length: n/4 #0th: compute integral integral = cumsum(data - mean(data)) #1st: compute different window lengths n_samples = npoints if npoints is not None else int(log(len(data))) lengths = sort(array(list(set( logspace(2,log(len(data)/4.),n_samples,base=exp(1)).astype(int) )))) #print lengths all_flucs = [] used_lengths = [] for wlen in lengths: # compute the fluctuation of residuals from a linear fit # according to Kantz&Schreiber, ddof must be the degree of polynomial, # i.e. 1 (or 2, if mean also counts? -> see in book) curr_fluc = [] # rrt = 0 for startIdx in arange(0,len(integral),wlen): pt = integral[startIdx:startIdx+wlen] if len(pt) > 3*(degree+1): resids = pt - polyval(polyfit(arange(len(pt)),pt,degree), arange(len(pt))) # if abs(wlen - lengths[0]) < -1: # print resids[:20] # elif rrt == 0: # print "wlen", wlen, "l0", lengths[0] # rrt += 1 curr_fluc.append(std(resids, ddof=degree+1)) if len(curr_fluc) > 0: if use_median: all_flucs.append(median(curr_fluc)) else: all_flucs.append(mean(curr_fluc)) used_lengths.append(wlen) return array(all_flucs), array(used_lengths)
def simulateGame(numGames, tilePairs, gameType): runTimeList = [] for i in range(numGames): gameTime = gameType(tilePairs) runTimeList.append(gameTime) medTime = pylab.median(runTimeList) meanTime = pylab.mean(runTimeList) pylab.hist(runTimeList,[x*2 for x in range(400)]) print 'meanTime: ' + str(meanTime) print 'medianTime: ' + str(medTime) return meanTime, medTime
def createMountain(x, y): "create the mountain at locations x and y" nx = len(x) ny = len(y) h0 = py.zeros([nx,ny]) # mountain parameters xc = py.median(x) # mountain centre yc = py.median(y) rm = 1.5e6 # mountain radius h0max = 500. # mountain peak height #h0max = 0. # set the mountain for all i,j locations for i in xrange(0,nx): for j in xrange(0,ny): dist = py.sqrt((x[i] - xc)**2 + (y[j] - yc)**2) if dist < rm: h0[i,j] = h0max*(1-dist/rm) return h0
def truncation_hist(df, outdir=FIGS_DIR): df2 = df[df['neuron type'].isin(['axon', 'truncated axon'])] df2 = df2.drop_duplicates(['neuron name', 'neuron type']) type_alphas = defaultdict(list) for neuron_name, group in df2.groupby('neuron name'): if len(group['neuron type']) < 2: continue for neuron_type, group2 in group.groupby('neuron type'): type_alphas[neuron_type] += list(group2['alpha']) alphas = [] weights = [] labels = [] for neuron_type in type_alphas: alpha = type_alphas[neuron_type] alphas.append(alpha) weights.append(pylab.ones_like(alpha) / float(len(alpha))) labels.append(neuron_type) pylab.figure() sns.set() pylab.hist(alphas, range=(0, 1), weights=weights, label=labels) leg = pylab.legend(frameon=True) pylab.setp(leg.get_texts(), fontsize=20) leg_frame = leg.get_frame() leg_frame.set_linewidth(5) leg_frame.set_edgecolor('k') curr_ax = pylab.gca() curr_ax.set_ylim((0, 1)) pylab.xlabel('alpha', size=30) pylab.ylabel('proportion', size=30) pylab.tight_layout() name = 'truncation_hist' outname = '%s/%s.pdf' % (outdir, name) outname = outname.replace(' ', '_') pylab.savefig('%s/%s.pdf' % (outdir, name), format='pdf') pylab.close() axons = pylab.array(type_alphas['axon']) truncated_axons = pylab.array(type_alphas['truncated axon']) differences = axons - truncated_axons print '-------------------------' print "Truncation test" print min(differences), pylab.median(differences), max(differences) print pylab.mean(differences), "+/-", pylab.std(differences, ddof=1) print wilcoxon(axons, truncated_axons) print ttest_rel(axons, truncated_axons)
def create_graph(name, sequences, cutoff=None): """Creates a graph based on the n-gram similarity of the sequences in the given list of sequences""" nodes = [s.name() for s in sequences] edges = [(s1.name(), s2.name(), s1.distance(s2)) for s1 in sequences for s2 in sequences] cutoff = cutoff if cutoff else pyl.median([d for (n1, n2, d) in edges]) print "edge cutoff: %.2f" % cutoff edges = filter(lambda (n1, n2, d): d < cutoff, edges) G = nx.Graph(name=name) G.add_nodes_from(nodes) G.add_edges_from(edges) return G
def __init__(self, data, time): # data format: multidimensional numpy array # Each inner array is an array of OD values # ordered by time. # This is important for determining the median self.dataReps = data # OD data values (replicates implied) self.dataMed = py.median(self.dataReps, axis=0) self.time = time # time values self.asymptote = self.__calcAsymptote() self.maxGrowthRate, self.mgrTime = self.__calcMGR() self.dataLogistic, self.lag = self.__calcLag() self.growthLevel = self.__calcGrowth()
def validate_age_group(model, replicate): # set random seed for reproducibility mc.np.random.seed(1234567 + replicate) N = 30 delta_true = 5.0 pi_true = true_rate_function m = simulate_age_group_data(N=N, delta_true=delta_true, pi_true=pi_true) if model == "midpoint_covariate": fit_midpoint_covariate_model(m) if model == "alt_midpoint_covariate": fit_alt_midpoint_covariate_model(m) elif model == "age_standardizing": fit_age_standardizing_model(m) elif model == "age_integrating": fit_age_integrating_model(m) elif model == "midpoint_model": fit_midpoint_model(m) elif model == "disaggregation_model": fit_disaggregation_model(m) else: raise TypeError, 'Unknown model type: "%s"' % model # compare estimate to ground truth import data_simulation m.mu = pandas.DataFrame( dict( true=[pi_true(a) for a in range(101)], mu_pred=m.vars["mu_age"].stats()["mean"], sigma_pred=m.vars["mu_age"].stats()["standard deviation"], ) ) data_simulation.add_quality_metrics(m.mu) print "\nparam prediction bias: %.5f, MARE: %.3f, coverage: %.2f" % ( m.mu["abs_err"].mean(), pl.median(pl.absolute(m.mu["rel_err"].dropna())), m.mu["covered?"].mean(), ) print data_simulation.add_quality_metrics(m.mu) data_simulation.initialize_results(m) data_simulation.add_to_results(m, "mu") data_simulation.finalize_results(m) return m
def get_uniform_data(*args): # internally define variables dist_old = args[0] elev_old = args[1] func_class_old = args[2] wav_lst_old = args[3] nom_dist_window = args[4] window_cnt = mp.ceil(max(dist_old) / nom_dist_window) act_dist_window = max(dist_old) / window_cnt dist_new = mp.linspace(0.0, dist_old[-1], window_cnt + 1) elev_new = np.asarray([-1.0] * len(dist_new)) func_class_new = np.zeros(len(dist_new)) - 1.0 wav_lst_new = np.zeros(len(dist_new)) - 1.0 for i in range(len(dist_new)): logical1 = dist_old >= (dist_new[i] - act_dist_window / 2.0) logical2 = dist_old <= (dist_new[i] + act_dist_window / 2.0) ind = mp.find(np.bitwise_and(logical1, logical2)) if len(ind) != 0: y0 = elev_old[ind] elev_new[i] = mp.median(y0) func_class_mode, func_class_mode_cnt = stats.mode( func_class_old[ind]) func_class_new[i] = np.copy(func_class_mode) wav_mode, wav_mode_cnt = stats.mode(wav_lst_old[ind]) wav_lst_new[i] = np.copy(wav_mode) elev_new[0] = 1.0 * elev_old[0] elev_new[-1] = 1.0 * elev_old[-1] ind = mp.find(elev_new != -1.0) if len(ind) > 1: elev_new_func = interp1d(dist_new[ind], elev_new[ind], kind=1) elev_new = elev_new_func(dist_new) ind = mp.find(func_class_new != -1.0) if len(ind) > 1: fc_new_func = interp1d(dist_new[ind], func_class_new[ind], kind=0) func_class_new = fc_new_func(dist_new) ind = mp.find(wav_lst_new != -1.0) if len(ind) > 1: wav_new_func = interp1d(dist_new[ind], wav_lst_new[ind], kind=0) wav_lst_new = wav_new_func(dist_new) return dist_new, elev_new, func_class_new, wav_lst_new
def readDatDirectory(key, directory): global stats #Don't read data in if it's already read if not key in DATA["mean"]: data = defaultdict(array) #Process the dat files for datfile in glob.glob(directory + "/*.dat"): fileHandle = open(datfile, 'rb') keys, dataDict = csvExtractAllCols(fileHandle) stats = union(stats, keys) for aKey in keys: if not aKey in data: data[aKey] = reshape(array(dataDict[aKey]), (1, len(dataDict[aKey]))) else: data[aKey] = append(data[aKey], reshape(array(dataDict[aKey]), (1, len(dataDict[aKey]))), axis=0) #Process the div files' for datfile in glob.glob(directory + "/*.div"): fileHandle = open(datfile, 'rb') keys, dataDict = csvExtractAllCols(fileHandle) stats = union(stats, keys) for aKey in keys: if not aKey in data: data[aKey] = reshape(array(dataDict[aKey]), (1, len(dataDict[aKey]))) else: data[aKey] = append(data[aKey], reshape(array(dataDict[aKey]), (1, len(dataDict[aKey]))), axis=0) #Iterate through the stats and calculate mean/standard deviation for aKey in stats: if aKey in data: DATA["mean"][key][aKey] = mean(data[aKey], axis=0) DATA["median"][key][aKey] = median(data[aKey], axis=0) DATA["std"][key][aKey] = std(data[aKey], axis=0) DATA["ste"][key][aKey] = std(data[aKey], axis=0) / sqrt( len(data[aKey])) DATA["min"][key][aKey] = mean(data[aKey], axis=0) - amin( data[aKey], axis=0) DATA["max"][key][aKey] = amax(data[aKey], axis=0) - mean( data[aKey], axis=0) DATA["actual"][key][aKey] = data[aKey]
def Create(cls, Image): '''Returns an object with the mean,median,std.dev of an image, this object is attached to the image object and only calculated once''' if '__IrtoolsImageStats__' in Image.__dict__: return Image.__dict__["__IrtoolsImageStats__"] istats = ImageStats() istats._median = median(Image) istats._mean = mean(Image) istats._stddev = std(Image) Image.__dict__["__IrtoolsImageStats__"] = istats return istats
def check_page(image): if len(image.shape) == 3: return "input image is color image %s" % (image.shape, ) if mean(image) < median(image): return "image may be inverted" h, w = image.shape if h < 600: return "image not tall enough for a page image %s" % (image.shape, ) if h > 10000: return "image too tall for a page image %s" % (image.shape, ) if w < 600: return "image too narrow for a page image %s" % (image.shape, ) if w > 10000: return "line too wide for a page image %s" % (image.shape, ) return None
def fit_single_gaussian(wav, flux, flux_err, name_line, wav_line, isplot=False, silent=True): p0 = [pl.median(flux), wav_line, 2., 10.] parinfo = [{'value': 0., 'fixed': 0, 'limited': [0, 0], 'limits': [0., 0.], 'parname': 'name'}] parinfo[0]['value'] = p0[0] parinfo[0]['parname'] = 'constant' parinfo.extend(generate_parinfo(p0=p0[1:])) fdata = {'x': wav, 'y': flux, 'err': flux_err, 'ngaussian': 1} res = mpfit(myfunct_gaussian, p0, parinfo=parinfo, functkw=fdata, quiet=silent) if (res.status < 0) or (res.perror is None): print('error message = ', res.errmsg) return emptyline(name_line, wav_line) line = LineProfile(name_line, res.params[1], res.params[3], res.perror[3], res.params[2], res.perror[2], res.params[0]) if isplot: line.spec = [wav, flux] return line
def plot_output_distribution(out,title): from splikes.utils import paramtext out=out.ravel() out_full=out result=py.hist(out,200) paramtext(1.2,0.95, 'min %f' % min(out_full), 'max %f' % max(out_full), 'mean %f' % py.mean(out_full), 'median %f' % py.median(out_full), 'std %f' % py.std(out_full), ) py.title(title)
def histogram_and_fit(distribution_name, points, bins=10, units="", **fit_kwargs): histogram = pylab.hist(points, bins) bins = histogram[1] bin_step = pylab.median(pylab.diff(bins)) distribution = _get_distribution(distribution_name) fit = distribution.fit(points, **fit_kwargs) xs = pylab.linspace(min(bins), max(bins), 1000) ys = distribution.pdf(xs, *fit) label = _get_label(distribution_name, fit, units) pylab.plot(xs, ys * len(points) * bin_step, 'r', label=label) pylab.legend() return fit
def plot_output_distribution(out, title): from splikes.utils import paramtext out = out.ravel() out_full = out result = py.hist(out, 200) paramtext( 1.2, 0.95, 'min %f' % min(out_full), 'max %f' % max(out_full), 'mean %f' % py.mean(out_full), 'median %f' % py.median(out_full), 'std %f' % py.std(out_full), ) py.title(title)
def fit(model): emp_priors = model.emp_priors ## Then fit the model and compare the estimates to the truth model.vars = {} model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, emp_priors['p', 'mu'], emp_priors['p', 'sigma']) model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=5000, burn=2000, thin=25, tune_interval=100) #model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=101, burn=0, thin=1, tune_interval=100) #graphics.plot_one_ppc(model.vars['p'], 'p') #graphics.plot_convergence_diag(model.vars) graphics.plot_one_type(model, model.vars['p'], emp_priors, 'p') pl.plot(model.a, model.pi_age_true, 'b--', linewidth=3, alpha=.5, label='Truth') pl.legend(fancybox=True, shadow=True, loc='upper left') pl.title('Heterogeneity %s'%model.parameters['p']['heterogeneity']) pl.show() model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean'] model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation'] data_simulation.add_quality_metrics(model.input_data) model.delta = pandas.DataFrame(dict(true=[model.delta_true])) model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean() model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std() data_simulation.add_quality_metrics(model.delta) print 'delta' print model.delta print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(), pl.median(pl.absolute(model.input_data['rel_err'].dropna())), model.input_data['covered?'].mean()) model.mu = pandas.DataFrame(dict(true=model.pi_age_true, mu_pred=model.vars['p']['mu_age'].stats()['mean'], sigma_pred=model.vars['p']['mu_age'].stats()['standard deviation'])) data_simulation.add_quality_metrics(model.mu) data_simulation.initialize_results(model) data_simulation.add_to_results(model, 'delta') data_simulation.add_to_results(model, 'mu') data_simulation.add_to_results(model, 'input_data') data_simulation.finalize_results(model) print model.results
def boot_curvefit(x, y, fit, p0, ci=.05, bootstraps=2000): """use of bootstrapping to perform curve fitting. Inputs: x - x values y - corresponding y values fit - a packaged fitting function p0 - intial parameter list that fit will use fit should be a function of the form p1 = fit(x, y, p0) with p1 being the optimized parameter vector Outputs: ci - 3xn array (n = number of parameters: median, low_ci, high_ci) booted_p - an bxn array of parameter values (b = number of bootstraps) An example fit function is: def fit(x, y, p0): func = lambda p, t: p[0]*pylab.exp(-t/abs(p[1])) + p[2] errfunc = lambda p, t, y: func(p, t) - y p1, success = optimize.leastsq(errfunc, p0, args=(t, y)) return p1 """ p0 = pylab.array(p0) #Make it an array in case it isn't one if bootstraps > 1: idx = pylab.randint(x.size, size=(x.size, bootstraps)) else: idx = pylab.zeros((x.size, 1), dtype=int) idx[:, 0] = pylab.arange(x.size) booted_p = pylab.zeros((p0.size, bootstraps)) for n in xrange(bootstraps): booted_p[:, n] = fit(x[idx[:, n]], y[idx[:, n]], p0) p_ci = pylab.zeros((3, p0.size)) for p in xrange(p0.size): booted_samp = pylab.sort(booted_p[p]) med = pylab.median(booted_samp) idx_lo = int(bootstraps * ci / 2.0) idx_hi = int(bootstraps * (1.0 - ci / 2)) p_ci[:, p] = [med, med - booted_samp[idx_lo], booted_samp[idx_hi] - med] return p_ci, booted_p
def mare(pred, obs): ''' model median absolute relative error Parameters ---------- pred : df df of observations from model.vars[data_type]['p_pred'].stats()['mean'] obs : df df of observations from model.vars[data_type]['p_obs'].value Results ------- mare : float mean median absolute relative error, as a percent ''' pred = pl.array(pred['mean']) obs = pl.array(obs['value']) mare = pl.median((abs(pred - obs)/obs)*100) return mare
def readDatDirectory(key, directory): global stats #Don't read data in if it's already read if not key in DATA["mean"]: data = defaultdict(array) #Process the dat files for datfile in glob.glob(directory + "/*.dat"): fileHandle = open(datfile, 'rb') keys, dataDict = csvExtractAllCols(fileHandle) stats = union(stats, keys) for aKey in keys: if not aKey in data: data[aKey] = reshape(array(dataDict[aKey]), (1, len(dataDict[aKey]))) else: data[aKey] = append(data[aKey], reshape(array(dataDict[aKey]), (1, len(dataDict[aKey]))), axis=0) #Process the div files' for datfile in glob.glob(directory + "/*.div"): fileHandle = open(datfile, 'rb') keys, dataDict = csvExtractAllCols(fileHandle) stats = union(stats, keys) for aKey in keys: if not aKey in data: data[aKey] = reshape(array(dataDict[aKey]), (1, len(dataDict[aKey]))) else: data[aKey] = append(data[aKey], reshape(array(dataDict[aKey]), (1, len(dataDict[aKey]))), axis=0) #Iterate through the stats and calculate mean/standard deviation for aKey in stats: if aKey in data: DATA["mean"][key][aKey] = mean(data[aKey], axis=0) DATA["median"][key][aKey] = median(data[aKey], axis=0) DATA["std"][key][aKey] = std(data[aKey], axis=0) DATA["ste"][key][aKey] = std(data[aKey], axis=0)/ sqrt(len(data[aKey])) DATA["min"][key][aKey] = mean(data[aKey], axis=0)-amin(data[aKey], axis=0) DATA["max"][key][aKey] = amax(data[aKey], axis=0)-mean(data[aKey], axis=0) DATA["actual"][key][aKey] = data[aKey]
def boot_curvefit(x,y,fit, p0, ci = .05, bootstraps=2000): """use of bootstrapping to perform curve fitting. Inputs: x - x values y - corresponding y values fit - a packaged fitting function p0 - intial parameter list that fit will use fit should be a function of the form p1 = fit(x, y, p0) with p1 being the optimized parameter vector Outputs: ci - 3xn array (n = number of parameters: median, low_ci, high_ci) booted_p - an bxn array of parameter values (b = number of bootstraps) An example fit function is: def fit(x, y, p0): func = lambda p, t: p[0]*pylab.exp(-t/abs(p[1])) + p[2] errfunc = lambda p, t, y: func(p, t) - y p1, success = optimize.leastsq(errfunc, p0, args=(t, y)) return p1 """ p0 = pylab.array(p0) #Make it an array in case it isn't one if bootstraps > 1: idx = pylab.randint(x.size, size=(x.size, bootstraps)) else: idx = pylab.zeros((x.size,1),dtype=int) idx[:,0] = pylab.arange(x.size) booted_p = pylab.zeros((p0.size, bootstraps)) for n in xrange(bootstraps): booted_p[:,n] = fit(x[idx[:,n]], y[idx[:,n]], p0) p_ci = pylab.zeros((3, p0.size)) for p in xrange(p0.size): booted_samp = pylab.sort(booted_p[p]) med = pylab.median(booted_samp) idx_lo = int(bootstraps * ci/2.0) idx_hi = int(bootstraps * (1.0-ci/2)) p_ci[:,p] = [med, med-booted_samp[idx_lo], booted_samp[idx_hi]-med] return p_ci, booted_p
def validate_age_group(model, replicate): # set random seed for reproducibility mc.np.random.seed(1234567+replicate) N = 30 delta_true = 5. pi_true = true_rate_function m = simulate_age_group_data(N=N, delta_true=delta_true, pi_true=pi_true) if model == 'midpoint_covariate': fit_midpoint_covariate_model(m) elif model == 'age_standardizing': fit_age_standardizing_model(m) elif model == 'age_integrating': fit_age_integrating_model(m) elif model == 'midpoint_model': fit_midpoint_model(m) elif model == 'disaggregation_model': fit_disaggregation_model(m) else: raise TypeError, 'Unknown model type: "%s"' % model # compare estimate to ground truth import data_simulation m.mu = pandas.DataFrame(dict(true=[pi_true(a) for a in range(101)], mu_pred=m.vars['mu_age'].stats()['mean'], lb_pred=m.vars['mu_age'].stats()['95% HPD interval'][:,0], ub_pred=m.vars['mu_age'].stats()['95% HPD interval'][:,1])) data_simulation.add_quality_metrics(m.mu) print '\nparam prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (m.mu['abs_err'].mean(), pl.median(pl.absolute(m.mu['rel_err'].dropna())), m.mu['covered?'].mean()) print data_simulation.add_quality_metrics(m.mu) data_simulation.initialize_results(m) data_simulation.add_to_results(m, 'mu') data_simulation.finalize_results(m) return m
def validate_age_group(model, replicate): # set random seed for reproducibility mc.np.random.seed(1234567 + replicate) N = 30 delta_true = 5. pi_true = true_rate_function m = simulate_age_group_data(N=N, delta_true=delta_true, pi_true=pi_true) if model == 'midpoint_covariate': fit_midpoint_covariate_model(m) if model == 'alt_midpoint_covariate': fit_alt_midpoint_covariate_model(m) elif model == 'age_standardizing': fit_age_standardizing_model(m) elif model == 'age_integrating': fit_age_integrating_model(m) elif model == 'midpoint_model': fit_midpoint_model(m) elif model == 'disaggregation_model': fit_disaggregation_model(m) else: raise TypeError, 'Unknown model type: "%s"' % model # compare estimate to ground truth import data_simulation m.mu = pandas.DataFrame( dict(true=[pi_true(a) for a in range(101)], mu_pred=m.vars['mu_age'].stats()['mean'], sigma_pred=m.vars['mu_age'].stats()['standard deviation'])) data_simulation.add_quality_metrics(m.mu) print '\nparam prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % ( m.mu['abs_err'].mean(), pl.median(pl.absolute( m.mu['rel_err'].dropna())), m.mu['covered?'].mean()) print data_simulation.add_quality_metrics(m.mu) data_simulation.initialize_results(m) data_simulation.add_to_results(m, 'mu') data_simulation.finalize_results(m) return m
def store_results(dm, area, sex, year): types_to_plot = 'p i r rr'.split() graphics.plot_convergence_diag(dm.vars) pl.clf() for i, t in enumerate(types_to_plot): pl.subplot(len(types_to_plot), 1, i+1) graphics.plot_data_bars(dm.model.get_data(t)) pl.plot(range(101), dm.emp_priors[t, 'mu'], linestyle='dashed', color='grey', label='Emp. Prior', linewidth=3) pl.plot(range(101), dm.true[t], 'b-', label='Truth', linewidth=3) pl.plot(range(101), dm.posteriors[t].mean(0), 'r-', label='Estimate', linewidth=3) pl.errorbar(range(101), dm.posteriors[t].mean(0), yerr=1.96*dm.posteriors[t].std(0), fmt='r-', linewidth=1, capsize=0) pl.ylabel(t) graphics.expand_axis() pl.legend(loc=(0.,-.95), fancybox=True, shadow=True) pl.subplots_adjust(hspace=0, left=.1, right=.95, bottom=.2, top=.95) pl.xlabel('Age (Years)') pl.show() model = dm model.mu = pandas.DataFrame() for t in types_to_plot: model.mu = model.mu.append(pandas.DataFrame(dict(true=dm.true[t], mu_pred=dm.posteriors[t].mean(0), sigma_pred=dm.posteriors[t].std(0))), ignore_index=True) data_simulation.add_quality_metrics(model.mu) print '\nparam prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.mu['abs_err'].mean(), pl.median(pl.absolute(model.mu['rel_err'].dropna())), model.mu['covered?'].mean()) print data_simulation.initialize_results(model) data_simulation.add_to_results(model, 'mu') data_simulation.finalize_results(model) print model.results return model
def try_kegg_api(): db = SqliteDatabase('../res/gibbs.sqlite') html_writer = HtmlWriter('../res/dG0_test.html') G = GroupContribution(db, html_writer=html_writer) G.init() wsdl = 'http://soap.genome.jp/KEGG.wsdl' serv = WSDL.Proxy(wsdl) rid_file = open('../res/eco_rids.txt', 'w') rids = set() for x in serv.list_pathways('eco'): pathway_id = x['entry_id'] for reaction_id in serv.get_reactions_by_pathway(pathway_id): rid = int(reaction_id[4:]) if rid not in rids: rids.add(rid) rid_file.write('%d\n' % rid) rid_file.close() c_mid = 1e-3 pH, pMg, I, T = (7.0, 3.0, 0.1, 298.15) rid2reversibility = {} misses = 0 for rid in sorted(rids): try: reaction = G.kegg.rid2reaction(rid) r = CalculateReversability(reaction, G, c_mid, pH, pMg, I, T) rid2reversibility[rid] = r except thermodynamics.MissingCompoundFormationEnergy: misses += 1 continue print 'hits = %d, misses = %d' % len(rid2reversibility), misses median = pylab.median(rid2reversibility.values()) print 'median = %.1f' % median pylab.figure() pylab.hold(True) plotting.cdf(rid2reversibility.values(), 'all reactions', 'r', show_median=True) pylab.show()
def stats(results): """ Compute and print statistics for record to stdout. In: results : list of dicts, processing results """ means = {'beta':0.0,'std':0.0, 'cov':0.0, 'mean':0.0} mins = means.copy() maxs = means.copy() medians = means.copy() stdevs = means.copy() for param in means.keys(): mins[param] = min([v[param] for v in results]) maxs[param] = max([v[param] for v in results]) means[param] = pl.mean([v[param] for v in results]) medians[param] = pl.median([v[param] for v in results]) stdevs[param] = pl.std([v[param] for v in results]) print "Min:\t\t%(beta)0.2f\t%(std)d\t%(cov)0.2f%%\t%(mean)d" % mins print "Max:\t\t%(beta)0.2f\t%(std)d\t%(cov)0.2f%%\t%(mean)d" % maxs print "Mean:\t\t%(beta)0.2f\t%(std)d\t%(cov)0.2f%%\t%(mean)d" % means print "Median:\t\t%(beta)0.2f\t%(std)d\t%(cov)0.2f%%\t%(mean)d" % medians print "Stdev:\t\t%(beta)0.2f\t%(std)d\t%(cov)0.2f%%\t%(mean)d" % stdevs
def makePlots(self, ax, query, fNum, fColor, fMarker, feedstock): query.getQuery() if query.queryString.startswith('No'): pass elif query.queryString.startswith('FR'): data = [1,1] ax.plot([fNum]*2,[1,1],fColor,marker=fMarker,markersize=2) else: data = self.db.output(query.queryString, self.db.schema) medVal = median(data) maxVal = max(data) minVal = min(data) ax.plot([fNum],medVal,fColor,marker='_', markersize=7) #Plot the max/min values ax.plot([fNum]*2,[maxVal, minVal],fColor,marker=fMarker, markersize=2) self.writeResults(feedstock, str(maxVal[0]), str(medVal), str(minVal[0]))
fwhm = [] for i in range(61,395): print i # open spectrum and calculate continuum level near Ha line then write to cursor file data = pf.getdata('fec2117_%04d.fits'%i) head = pf.getheader('fec2117_%04d.fits'%i) start = head['CRVAL1'] step = head['CDELT1'] length = head['NAXIS1'] x = start + pl.arange(0,length)*step hi = x > 4640 low = x < 4730 xx = hi*low med = pl.median(data[xx]) print med cursor = open('cursor','w') cursor.write('4640 %s 1 k\n4730 %s 1 k\n'%(med,med)) cursor.close() iraf.splot(images='fec2117_%04d.fits'%i,\ cursor='cursor',\ save_file='splot.log') # read splot.log and extract the results myfile = open('splot.log','r') lines = myfile.readlines() try: # the first log written to empty file has header temp = string.split(string.strip(lines[3]))