Пример #1
0
def add_to_results(model, name):
    df = getattr(model, name)
    model.results['param'].append(name)
    model.results['bias'].append(df['abs_err'].mean())
    model.results['mae'].append((pl.median(pl.absolute(df['abs_err'].dropna()))))
    model.results['mare'].append(pl.median(pl.absolute(df['rel_err'].dropna())))
    model.results['pc'].append(df['covered?'].mean())
Пример #2
0
def trace_fibers(flatim, params):

    trace_im1 = pf.getdata(flatim)*0

    imdat, imhead = pf.getdata(flatim), pf.getheader(flatim)

    ###  separating fibers in first column and assigning fibers ids
    print '\n\tSEARCHING FOR FIBERS BETWEEN x=0 & y=[' +str(params.LO_BUFFER) +':'+str(len(imdat[:,0])-params.HI_BUFFER)+']'
    fiber_peaks_pix = pl.find( imdat[:,0] > pl.median(imdat[:,0]) )
    fiber_peaks_pix,fiber_peaks_flx = sep_peaks( fiber_peaks_pix, imdat[:,0] )
    if pl.median(fiber_peaks_pix[0])<=params.LO_BUFFER:
        fiber_peaks_pix = fiber_peaks_pix[1:]
        fiber_peaks_flx = fiber_peaks_flx[1:]
    if (len(imdat[:,0])-pl.median(fiber_peaks_pix[-1]))<=params.HI_BUFFER:
        fiber_peaks_pix = fiber_peaks_pix[:-1]
        fiber_peaks_flx = fiber_peaks_flx[:-1]
    print '\t  --> FOUND ', len(fiber_peaks_pix), ' FIBER PEAKS'

    ###  creating array for fibers
    fibers0 = []
    id_cnt = 1
    for f in range(len(fiber_peaks_pix)):
        while params.FIBERS_EXCLUDE.tolist().count(id_cnt)==1: id_cnt+=1

        fibx,fiby = fiber_peaks_pix[f],fiber_peaks_flx[f]
        peakx = fibx[ fiby.tolist().index(max(fiby)) ]
        yrange = pl.arange(  peakx-params.FIBER_WIDTH/2  ,  peakx+params.FIBER_WIDTH/2+1  )

        fibers0.append( fiber(id_cnt, 0,     yrange     ))
        id_cnt+=1

##  TRACING FIBERS ALONG X-AXIS INDIVIDUALLY
    for fib in fibers0:
        for x in range(1,len(imdat)):
##  FIRST, TAKE THE FLUXES IN THE PIXELS AT x
##  THAT ENCOMPASSED THE PEAK AT x-1
            fluxes = imdat[ fib.xy[-1][1] , x ]
##  NEXT, FIND THE VERTICAL SHIFT TO CENTER ON
##  THE PEAK FLUX AT x. MAXIMUM SHIFT IS DETERMINED
##  FROM THE FIBER_WIDTH PARAMETER.
            deltay = range( -len(fluxes)/2+1 , len(fluxes)/2+1 )[ fluxes.tolist().index(max(fluxes)) ]

##  RECORD THE NEW Y-PIXELS THAT ARE CENTERD ON
##  THE FIBER AT x.
            fib.xy.append( [ x, fib.xy[-1][1]+deltay ] )

##  FLAG PIXELS FOR FIBER IN FIRST-PASS TRACE IMAGE
            trace_im1[fib.xy[-1][1],x] = fib.id



    trc0 = 'trace_pass1.fits'
    print '\n\tWRITING INITIAL TRACING TO ', trc0
    try: pf.writeto(trc0, trace_im1, header=imhead)
    except:
        os.remove(trc0)
        pf.writeto(trc0, trace_im1, header=imhead)
        
    return fibers0
Пример #3
0
def add_to_results(model, name):
    df = getattr(model, name)
    model.results['param'].append(name)
    model.results['bias'].append(df['abs_err'].mean())
    model.results['mae'].append(
        (pl.median(pl.absolute(df['abs_err'].dropna()))))
    model.results['mare'].append(pl.median(pl.absolute(
        df['rel_err'].dropna())))
    model.results['pc'].append(df['covered?'].mean())
Пример #4
0
def combine_output(J, T, model, dir, reps, save=False):
    """
    Combine output on absolute error, relative error, csmf_accuracy, and coverage from from
    multiple runs of validate_once. Either saves the output to the disk, or returns arays
    for each. 
    """

    cause = pl.zeros(J*T, dtype='f').view(pl.recarray)
    time = pl.zeros(J*T, dtype='f').view(pl.recarray)
    abs_err = pl.zeros(J*T, dtype='f').view(pl.recarray) 
    rel_err = pl.zeros(J*T, dtype='f').view(pl.recarray)
    coverage = pl.zeros(J*T, dtype='f').view(pl.recarray)
    csmf_accuracy = pl.zeros(J*T, dtype='f').view(pl.recarray)

    for i in range(reps): 
        metrics = pl.csv2rec('%s/metrics_%s_%i.csv' % (dir, model, i))
        cause = pl.vstack((cause, metrics.cause))
        time = pl.vstack((time, metrics.time))
        abs_err = pl.vstack((abs_err, metrics.abs_err))
        rel_err = pl.vstack((rel_err, metrics.rel_err))
        coverage = pl.vstack((coverage, metrics.coverage))
        csmf_accuracy = pl.vstack((csmf_accuracy, metrics.csmf_accuracy))

    cause = cause[1:,]
    time = time[1:,]    
    abs_err = abs_err[1:,]
    rel_err = rel_err[1:,]
    coverage = coverage[1:,]
    csmf_accuracy = csmf_accuracy[1:,]

    mean_abs_err = abs_err.mean(0)
    median_abs_err =  pl.median(abs_err, 0)
    mean_rel_err = rel_err.mean(0)
    median_rel_err = pl.median(rel_err, 0)
    mean_csmf_accuracy = csmf_accuracy.mean(0)
    median_csmf_accuracy = pl.median(csmf_accuracy, 0)
    mean_coverage_bycause = coverage.mean(0)
    mean_coverage = coverage.reshape(reps, T, J).mean(0).mean(1)
    percent_total_coverage = (coverage.reshape(reps, T, J).sum(2)==3).mean(0)
    mean_coverage = pl.array([[i for j in range(J)] for i in mean_coverage]).ravel()
    percent_total_coverage = pl.array([[i for j in range(J)] for i in percent_total_coverage]).ravel()

    models = pl.array([[model for j in range(J)] for i in range(T)]).ravel()
    true_cf = metrics.true_cf
    true_std = metrics.true_std
    std_bias = metrics.std_bias

    all = pl.np.core.records.fromarrays([models, cause[0], time[0], true_cf, true_std, std_bias, mean_abs_err, median_abs_err, mean_rel_err, median_rel_err, 
                                         mean_csmf_accuracy, median_csmf_accuracy, mean_coverage_bycause, mean_coverage, percent_total_coverage], 
                                        names=['model', 'cause', 'time', 'true_cf', 'true_std', 'std_bias', 'mean_abs_err', 'median_abs_err', 
                                         'mean_rel_err', 'median_rel_err', 'mean_csmf_accuracy', 'median_csmf_accuracy', 
                                         'mean_covearge_bycause', 'mean_coverage', 'percent_total_coverage'])   
    
    if save: 
        pl.rec2csv(all, '%s/%s_summary.csv' % (dir, model)) 
    else: 
        return all
def epsetstmax(path):
    dico = load_spatial_means(path)
    time = dico["t"]
    eps = dico["epsK_tot"] + dico["epsA_tot"]
    E = dico["EK"] + dico["EA"]

    # if 'noise' in path:
    if eps.max() > 2 * eps[-1]:

        def f(x, amptan, ttan):
            return amptan * pl.tanh(2 * (x / ttan)**4)

        guesses = np.array([pl.median(eps), time[eps == eps.max()][0]])
    else:
        # def f(x, amptan, ttan, amplog, sigma):
        def f(x, amptan, ttan, amplog, tlog, sigma):
            return amptan * pl.tanh(
                2 * (x / ttan)**4) + amplog * stats.lognorm.pdf(
                    x, scale=pl.exp(tlog), s=sigma)

        guesses = np.array(
            (
                # amptan'
                pl.median(eps),
                # ttan'
                time[eps == eps.max()],
                # amplog
                eps.max(),
                # tlog
                time[eps == eps.max()],
                # sigma
                eps.std(),
            ),
            dtype=float,
        )
        # guesses = pl.array(list(guessesd.values()), dtype=float)

    try:
        popt, pcov = curve_fit(f, time, eps, guesses, maxfev=3000)
    except RuntimeError:
        print("Error while curve fitting data from path =", path)
        raise

    eps_fit = f(time, *popt)
    eps_stat = float(eps_fit[-1])
    try:
        # idx = _index_flat(eps_fit, time)
        idx = locate_knee(time, eps_fit, eps_stat)
        time_stat = time[idx]
    except ValueError:
        raise ValueError("While calculating curvature in {}".format(path))
        # warn("While calculating curvature in {}".format(path))
        # time_stat = popt[1] + 6 * popt[3]

    E_stat = E[idx:].mean()
    return eps_stat, E_stat, time_stat, time[-1]
Пример #6
0
def loadFile(objectFileName):
    oimg = pyfits.open(objectFileName)

    # Load the IFU data -- Row-stacked spectra
    odata = oimg[1].data
    oError = oimg[2].data
    odata_dim = odata.shape
    wcs = astWCS.WCS(objectFileName, extensionName=1)
    owavelengthStartEnd = wcs.getImageMinMaxWCSCoords()[0:2]
    fiberNumber = wcs.getImageMinMaxWCSCoords()[2:4]
    owavelengthStep = oimg[1].header['CDELT1']

    owavelengthRange = [owavelengthStartEnd[0] + i * owavelengthStep
                        for i in range(odata_dim[1])]

    # Check to make sure we got it right
    if not owavelengthRange[-1] == owavelengthStartEnd[-1]:
        print 'The ending wavelenghts do not match... Exiting'
        sys.exit(1)
    else:
        # make median sky
        specs = pyl.array([flux for flux in odata])
        skySpec = pyl.median(specs, axis=0)

    RSS = []
    for i in range(int(fiberNumber[1])):
        #oflux = odata[i] - oskyflux
        oflux = odata[i] - skySpec
        oflux[pyl.isnan(oflux)] = 0.0
        oErrorFlux = oError[i]
        #oflux = odata[i]

        # Mask out extreme values in spectrum
        # Just because edges dodgy in efosc
        med = pyl.median(oflux)
        oflux[pyl.greater(abs(oflux), 10.0 * med)] = 0.0001

        objSED = astSED.SED(wavelength=owavelengthRange, flux=oflux)
        #skySED = astSED.SED(wavelength=owavelengthRange, flux=oskyflux)
        skySED = astSED.SED(wavelength=owavelengthRange, flux=skySpec)
        errSED = astSED.SED(wavelength=owavelengthRange, flux=oErrorFlux)

        #  make it > 0 everywhere
        objSED.flux = objSED.flux - objSED.flux.min()
        objSED.flux = objSED.flux / objSED.flux.max()
        errSED.flux = errSED.flux - errSED.flux.min()
        errSED.flux = errSED.flux / errSED.flux.max()
        skySED.flux = skySED.flux - skySED.flux.min()
        skySED.flux = skySED.flux / skySED.flux.max()

        RSS.append({'object': objSED, 'sky': skySED, 'error': errSED})

    return RSS
Пример #7
0
  def one_ci(v, ci, bootstraps):
    v = pylab.array(v)
    v = pylab.ma.masked_array(v,pylab.isnan(v)).compressed()
    if v.size == 0:
      return pylab.nan, 0, 0 #Nothing to compute

    r = pylab.randint(v.size, size=(v.size, bootstraps))
    booted_samp = pylab.array([pylab.median(v[r[:,n]]) for n in xrange(bootstraps)])
    booted_samp.sort()

    med = pylab.median(booted_samp)
    idx_lo = int(bootstraps * ci/2.0)
    idx_hi = int(bootstraps * (1.0-ci/2))

    return med, med-booted_samp[idx_lo], booted_samp[idx_hi]-med
Пример #8
0
def binit(x, y, n):  #bin arrays x, y into n bins, returning xbin,ybin
    nx = len(x)
    y = N.take(y, N.argsort(x))  #sort y according to x rankings
    x = N.take(x, N.argsort(x))
    xbin = N.zeros(n, 'f')
    ybin = N.zeros(n, 'f')
    for i in range(n):
        nmin = i * int(float(nx) / float(n))
        nmax = (i + 1) * int(float(nx) / float(n))
        xbin[i] = pylab.median(x[nmin:nmax])
        ybin[i] = pylab.median(y[nmin:nmax])
        #xbin[i]=N.average(x[nmin:nmax])
        #ybin[i]=N.average(y[nmin:nmax])
        #ybinerr[i]=scipy.stats.std(y[nmin:nmax])
    return xbin, ybin  #, ybinerr
Пример #9
0
 def computeMAD(self, a, c=0.6745, axis=0):
     a = np.array(a)
     if a.ndim == 1:
         d = pl.median(a)
         m = pl.median(np.fabs(a - d) / c)
     elif (a.ndim > 1):
         d = pl.median(a, axis=axis)
         if axis > 0:
             aswp = swapaxes(a, 0, axis)
         else:
             aswp = a
         m = pl.median(np.fabs(aswp - d) / c, axis=0)
     else:
         m = 0
     return m
Пример #10
0
def remove_discontinuity(value, xgap=10, ygap=200):
    """
    Remove discontinuity (sudden jump) in a series of values.
    Written by Denis, developed for LLC Fringe Counts data.
    value : list or numpy.array
    xgap  : "width" of index of the list/array to adjust steps
    ygap  : threshold value to detect discontinuity
    """
    difflist = pl.diff(value)
    discont_index = pl.find(abs(difflist) > ygap)

    if len(discont_index) == 0:
        return value
    else:
        discont_index = pl.append(discont_index, len(difflist))

    # find indice at discontinuities
    discont = {"start": [], "end": []}
    qstart = discont_index[0]
    for i in range(len(discont_index) - 1):
        if discont_index[i + 1] - discont_index[i] > xgap:
            qend = discont_index[i]
            discont["start"].append(qstart - xgap)
            discont["end"].append(qend + xgap)
            qstart = discont_index[i + 1]

    # add offsets at discontinuities
    result = pl.array(value)
    for i in range(len(discont["end"])):
        result[0 : discont["start"][i]] += result[discont["end"][i]] - result[discont["start"][i]]

    # remove the median
    result = result - pl.median(result)
    return result
Пример #11
0
    def read_align(self, readfile):

        self.df = pd.read_csv(readfile, sep='\t', header=None)
        self.df.columns = ['ref', 'start', 'end', 'dummy', 'quality', 'strand']
        from pylab import median
        self.read_length = round(median(self.df['end'] - self.df['start']))
        self.chromosomes = self.df['ref'].unique()
Пример #12
0
def scatter_times(name, sheets):
    means = []
    medians = []
    delays = []
    mean_points = []
    med_points = []
    for sheet, delay in sheets:
        delays.append(delay)
        times = get_times(sheet)
        mean = pylab.mean(times)
        median = pylab.median(times)
        means.append(mean)
        medians.append(median)
        mean_points.append((mean, sheet))
        med_points.append((median, sheet)) 
    
    print "----------mean points-----------"    
    for mean, sheet in sorted(mean_points):
        print mean, sheet
    print "----------median points-----------"
    for median, sheet in sorted(med_points):
        print median, sheet
          
    pylab.scatter(delays, means, color='r')
    pylab.scatter(delays, medians, color='b')
    print "show"
    pylab.show()
Пример #13
0
    def makePlots(self, ax, query, fNum, fColor, fMarker, feedstock):

        query.getQuery()

        if query.queryString.startswith('No'):
            pass

        elif query.queryString.startswith('FR'):
            data = [1, 1]
            ax.plot([fNum] * 2, [1, 1], fColor, marker=fMarker, markersize=2)

        else:
            data = self.db.output(query.queryString, self.db.schema)
            medVal = median(data)
            maxVal = max(data)
            minVal = min(data)

            ax.plot([fNum], medVal, fColor, marker='_', markersize=7)

            #Plot the max/min values
            ax.plot([fNum] * 2, [maxVal, minVal],
                    fColor,
                    marker=fMarker,
                    markersize=2)

            self.writeResults(feedstock, str(maxVal[0]), str(medVal),
                              str(minVal[0]))
Пример #14
0
def binitbinsfix(binsleft, binsright, x, y):  #give bins for binning
    nbin = len(binsleft)
    xbin = N.zeros(len(binsleft), 'f')
    ybin = N.zeros(len(binsleft), 'f')
    ybinerr = N.zeros(len(binsleft), 'f')
    y = N.take(y, N.argsort(x))  #sort y according to x rankings
    x = N.take(x, N.argsort(x))

    j = -1
    for i in range(len(xbin)):
        xmin = binsleft[i]
        xmax = binsright[i]
        yb = []
        for j in range(len(x)):
            if x[j] > xmin:
                yb.append(y[j])
            if x[j] > xmax:
                yb = N.array(yb, 'f')
                xbin[i] = 0.5 * (xmin + xmax)
                try:
                    ybin[i] = pylab.median(yb)
                    ybinerr[i] = pylab.std(yb) / N.sqrt(1. * len(yb))
                except ZeroDivisionError:
                    print "warning: ZeroDivision error in binitbinsfix"
                    ybin[i] = 0.
                    ybinerr[i] = 0.

                break

    return xbin, ybin, ybinerr
Пример #15
0
def smooth(x, window_len=11, window='flat', rms=0):
    if window == 'median':
        movmed, movrms = [], []
        for i in range(len(x)):
            low = max(0, i-window_len/2)
            high = min(len(x), i+window_len/2)
            movmed.append(median(x[low:high]))
            movrms.append(numpy.std(x[low:high]))
        if rms == 0:
            return array(movmed)
        else:
            return array(movmed), array(movrms)

    else:
       if x.ndim != 1:
           raise ValueError, "smooth only accepts 1 dimension arrays."
       if x.size < window_len:
           raise ValueError, "Input vector needs to be bigger than window size."
       if window_len<3:
           return x
       if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
           raise ValueError, "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'"
       s=numpy.r_[2*x[0]-x[window_len-1::-1],x,2*x[-1]-x[-1:-window_len:-1]]
       #print(len(s))
       if window == 'flat': #moving average
           w=numpy.ones(window_len,'d')
       else:
           w=eval('numpy.'+window+'(window_len)')
       y = numpy.convolve(w/w.sum(),s,mode='same')
       return array(y[window_len:-window_len+1])
Пример #16
0
    def one_ci(v, ci, bootstraps):
        v = pylab.array(v)
        v = pylab.ma.masked_array(v, pylab.isnan(v)).compressed()
        if v.size == 0:
            return pylab.nan, 0, 0  #Nothing to compute

        r = pylab.randint(v.size, size=(v.size, bootstraps))
        booted_samp = pylab.array(
            [pylab.median(v[r[:, n]]) for n in xrange(bootstraps)])
        booted_samp.sort()

        med = pylab.median(booted_samp)
        idx_lo = int(bootstraps * ci / 2.0)
        idx_hi = int(bootstraps * (1.0 - ci / 2))

        return med, med - booted_samp[idx_lo], booted_samp[idx_hi] - med
Пример #17
0
def plotter(key, sims, ax, label='', ylabel='', low_q=0.05, high_q=0.95, startday=None):

    color = cv.get_colors()[key.split('_')[1]]

    ys = []
    for s in sims:
        ys.append(s.results[key].values)
    yarr = np.array(ys)

    best = pl.median(yarr, axis=0)
    low  = pl.quantile(yarr, q=low_q, axis=0)
    high = pl.quantile(yarr, q=high_q, axis=0)

    sim = sims[0]

    tvec = np.arange(len(best))

    fill_label = None
    pl.fill_between(tvec, low, high, facecolor=color, alpha=0.2, label=fill_label)
    pl.plot(tvec, best, c=color, label=label, lw=4, alpha=1.0)

    sc.setylim()

    datemarks = pl.array([sim.day('2020-03-01'),sim.day('2020-05-01'),sim.day('2020-07-01'),
                          sim.day('2020-09-01')])
    ax.set_xticks(datemarks)
    pl.ylabel(ylabel)

    return
Пример #18
0
def fit_tri_gaussian(wav, flux, flux_err, name_line, wav_line, rline=None, isplot=False, silent=True):
    p0 = [pl.median(flux), wav_line[0], 2., 10., wav_line[1], 2., 10., wav_line[2], 2., 10.]
    parinfo = [{'value': 0., 'fixed': 0, 'limited': [0, 0], 'limits': [0., 0.], 'tied': '', 'parname': 'name'}]
    parinfo[0]['value'] = p0[0]
    parinfo[0]['parname'] = 'constant'
    parinfo.extend(generate_parinfo(p0=p0[1:4], pname=['wav_center1', 'width1', 'area1']))
    parinfo.extend(generate_parinfo(p0=p0[4:7], pname=['wav_center2', 'width2', 'area2']))
    parinfo.extend(generate_parinfo(p0=p0[7:10], pname=['wav_center3', 'width3', 'area3']))
    if rline is not None:
        if rline[0] is not None:
            parinfo[3]['tied'] = str(rline[0]) + ' * p[9]'
        if rline[1] is not None:
            parinfo[6]['tied'] = str(rline[1]) + ' * p[9]'
    fdata = {'x': wav, 'y': flux, 'err': flux_err, 'ngaussian': 3}
    res = mpfit(myfunct_gaussian, p0, parinfo=parinfo, functkw=fdata, quiet=silent)
    if (res.status < 0) or (res.perror is None):
        print('error message = ', res.errmsg)
        return emptyline(name_line, wav_line)
    line1 = LineProfile(name_line[0], res.params[1], res.params[3], res.perror[3], res.params[2], res.perror[2], res.params[0])
    line2 = LineProfile(name_line[1], res.params[4], res.params[6], res.perror[6], res.params[5], res.perror[5], res.params[0])
    line3 = LineProfile(name_line[2], res.params[7], res.params[9], res.perror[9], res.params[8], res.perror[8], res.params[0])
    if isplot:
        line1.spec = [wav, flux]
        line2.spec = [wav, flux]
        line3.spec = [wav, flux]
    return [line1, line2, line3]
Пример #19
0
def flow_rate_hist(sheets):
    ant_rates = []
    weights = []
    for sheet in sheets:
        ants, seconds, weight = flow_rate(sheet)
        ant_rate = seconds / ants
        #ant_rate = ants / seconds
        ant_rates.append(ant_rate)
        weights.append(float(weight))
        #weights.append(seconds)

    weights = pylab.array(weights)
    weights /= sum(weights)

    #print "ants per second"
    print "seconds per ant"
    mu = pylab.mean(ant_rates)
    print "mean", pylab.mean(ant_rates)
    wmean = pylab.average(ant_rates, weights=weights)
    print "weighted mean", wmean
    print "median", pylab.median(ant_rates)
    print "std", pylab.std(ant_rates, ddof=1)
    ant_rates = pylab.array(ant_rates)
    werror = (ant_rates - mu) * weights
    print "weighted std", ((sum(werror ** 2))) ** 0.5
    print "weighted std 2", (pylab.average((ant_rates - mu)**2, weights=weights)) ** 0.5
    pylab.figure()
    pylab.hist(ant_rates)
    pylab.savefig('ant_flow_rates.pdf', format='pdf')
    pylab.close()
Пример #20
0
def pitch_estimate(dw):
    step = 8
    wsize = 2048
    wfun = pl.ones
    wa = 3
    lo, hi = 50, 700
    hist_params = dict(bins=800, lw=0, range=[lo,hi], rwidth=1.0,
        normed=True, log=True)

    subplots = wplot.Subplots(6, 1,
        yticks=[0,.1,.25,.5,1],
        xlim=(120,240),
        autoscalex_on=False)

    for wfun in [pl.hanning, pl.hamming, pl.blackman, pl.bartlett, pl.ones]:
        cc = chunker.Chunker(dw, window=wfun(wsize), step=step)
        acs = [cc.ac for c in cc.chunks()]
        pp = [chunker.find_peak(a, lo, hi, wa=wa) for a in acs]
        mm = pl.median(pp)
        subplots(
            title='window: %s(%d) step=%s range=%s wa=%d' % (wfun.func_name, wsize, step, [lo,hi], wa),
            xticks=[mm]+range(lo,hi+50,50))
        subplots.next()
        freq, bins, patches = pl.hist(pp, **hist_params)

    print 'Ok!'
Пример #21
0
 def makePlots(self, ax, x, fNum, fColor, fMarker, feedstock):
     
     x.getQuery()
     
     if x.queryString.startswith('No'):
         pass    
     
     elif x.queryString.startswith('FR'):
         data = [1,1]
         ax.plot([fNum]*2,[1,1],fColor,marker=fMarker,markersize=2)
         
     else:
         cur = self.conn.cursor()
         print x.queryString
         cur.execute(x.queryString)
         #[all data]
         data = cur.fetchall()
         cur.close()
         medVal = median(data)
         maxVal = max(data)
         minVal = min(data)
         
         ax.plot([fNum],medVal,fColor,marker='_', markersize=7)
 
         #Plot the max/min values
         ax.plot([fNum]*2,[maxVal, minVal],fColor,marker=fMarker, markersize=2)    
         
         self.writeResults(feedstock, str(maxVal[0]), str(medVal), str(minVal[0]))
Пример #22
0
def get_age_sex(is_crew=False,
                min_age=18,
                max_age=99,
                crew_age=35,
                crew_std=5,
                guest_age=68,
                guest_std=8):
    '''
    Define age-sex distributions. Passenger age distribution based on:
        https://www.nytimes.com/reuters/2020/02/12/world/asia/12reuters-china-health-japan.html

        "About 80% of the passengers were aged 60 or over [=2130], with 215 in their 80s and 11 in the 90s,
        the English-language Japan Times newspaper reported."
    '''

    # Define female (0) or male (1) -- evenly distributed
    sex = pl.randint(2)

    # Define age distribution for the crew and guests
    if is_crew:
        age = pl.normal(crew_age, crew_std)
    else:
        age = pl.normal(guest_age, guest_std)

    # Normalize
    age = pl.median([min_age, age, max_age])

    return age, sex
Пример #23
0
def remove_discontinuity(value, xgap=10, ygap=200):
    """
    Remove discontinuity (sudden jump) in a series of values.
    Written by Denis, developed for LLC Fringe Counts data.
    value : list or numpy.array
    xgap  : "width" of index of the list/array to adjust steps
    ygap  : threshold value to detect discontinuity
    """
    difflist = pl.diff(value)
    discont_index = pl.find(abs(difflist) > ygap)

    if len(discont_index) == 0:
        return value
    else:
        discont_index = pl.append(discont_index, len(difflist))

    # find indice at discontinuities
    discont = {'start': [], 'end': []}
    qstart = discont_index[0]
    for i in range(len(discont_index)-1):
        if discont_index[i+1]-discont_index[i] > xgap:
            qend = discont_index[i]
            discont['start'].append(qstart-xgap)
            discont['end'].append(qend+xgap)
            qstart = discont_index[i+1]

    # add offsets at discontinuities
    result = pl.array(value)
    for i in range(len(discont['end'])):
        result[0:discont['start'][i]] += \
            result[discont['end'][i]] - result[discont['start'][i]]

    #remove the median
    result=result-pl.median(result)
    return result
Пример #24
0
def mare(model, data_type):
    try:
        pred = model.vars[data_type]['p_pred'].trace().mean(0)
    except:
        pred = 0    
    obs = model.get_data(data_type)['value']
    mare = pl.median((abs(pred - obs)/obs)*100)
    return mare
Пример #25
0
def mare(model, data_type):
    try:
        pred = model.vars[data_type]['p_pred'].trace().mean(0)
    except:
        pred = 0
    obs = model.get_data(data_type)['value']
    mare = pl.median((abs(pred - obs) / obs) * 100)
    return mare
Пример #26
0
def store_results(dm, area, sex, year):
    types_to_plot = 'p i r rr'.split()

    graphics.plot_convergence_diag(dm.vars)
    pl.clf()
    for i, t in enumerate(types_to_plot):
        pl.subplot(len(types_to_plot), 1, i + 1)
        graphics.plot_data_bars(dm.model.get_data(t))
        pl.plot(range(101),
                dm.emp_priors[t, 'mu'],
                linestyle='dashed',
                color='grey',
                label='Emp. Prior',
                linewidth=3)
        pl.plot(range(101), dm.true[t], 'b-', label='Truth', linewidth=3)
        pl.plot(range(101),
                dm.posteriors[t].mean(0),
                'r-',
                label='Estimate',
                linewidth=3)

        pl.errorbar(range(101),
                    dm.posteriors[t].mean(0),
                    yerr=1.96 * dm.posteriors[t].std(0),
                    fmt='r-',
                    linewidth=1,
                    capsize=0)

        pl.ylabel(t)
        graphics.expand_axis()

    pl.legend(loc=(0., -.95), fancybox=True, shadow=True)
    pl.subplots_adjust(hspace=0, left=.1, right=.95, bottom=.2, top=.95)
    pl.xlabel('Age (Years)')
    pl.show()

    model = dm
    model.mu = pandas.DataFrame()
    for t in types_to_plot:
        model.mu = model.mu.append(pandas.DataFrame(
            dict(true=dm.true[t],
                 mu_pred=dm.posteriors[t].mean(0),
                 sigma_pred=dm.posteriors[t].std(0))),
                                   ignore_index=True)
    data_simulation.add_quality_metrics(model.mu)
    print '\nparam prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (
        model.mu['abs_err'].mean(),
        pl.median(pl.absolute(
            model.mu['rel_err'].dropna())), model.mu['covered?'].mean())
    print

    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'mu')
    data_simulation.finalize_results(model)

    print model.results

    return model
Пример #27
0
    def one_ci(pc, nsamp, ci, bootstraps):
        booted_p = boot_p(pc, nsamp, bootstraps)
        booted_p.sort()

        p = pylab.median(booted_p)
        idx_lo = int(bootstraps * ci / 2.0)
        idx_hi = int(bootstraps * (1.0 - ci / 2))

        return p, p - booted_p[idx_lo], booted_p[idx_hi] - p
Пример #28
0
  def one_ci(pc, nsamp, ci, bootstraps):
    booted_p = boot_p(pc, nsamp, bootstraps)
    booted_p.sort()

    p = pylab.median(booted_p)
    idx_lo = int(bootstraps * ci/2.0)
    idx_hi = int(bootstraps * (1.0-ci/2))

    return p, p-booted_p[idx_lo], booted_p[idx_hi]-p
Пример #29
0
def plot_histogram(histogram, html_writer, title='', max_pathway_length=8, xmin=None, xlim=20, error_bars=True, min_to_show=20, legend_loc='upper left'):
    fig = pylab.figure()

    pylab.hold(True)

    reps = 1000
    
    y_offset = 0
    offset_step = 0.007
    colors = {1:'r', 2:'orange', 3:'green', 4:'cyan', 5:'blue', 'Rest':'violet', 'Not first':'k--', 'No known regulation':'grey', 'Activated':'green', 'Inhibited':'r', 'Mixed regulation':'blue'}
    for key, value in histogram.iteritems():
        if len(value) >= min_to_show:
            m = stats.cmedian(value)
            
            sample_std = None
            
            if error_bars:
                sample_vals = []
                i = 0
                while i < reps:
                    samples = []
                    while len(samples) < len(value):
                        samples.append(random.choice(value))
                    sample_vals.append(pylab.median(samples))
                    i += 1
                
                sample_std = pylab.std(sample_vals)
                        
            plotting.cdf(value, label='%s (med=%.1f, N=%d)' % \
                (key, m, len(value)),
                style=colors.get(key, 'grey'), std=sample_std, y_offset=y_offset)
            y_offset += offset_step
            

    xmin = -1 * xlim if xmin == None else xmin
    pylab.xlim(xmin, xlim)
    pylab.xlabel('Irreversability')
    #pylab.xlabel('deltaG')
    pylab.ylabel('Cumulative distribution')
    legendfont = matplotlib.font_manager.FontProperties(size=11)
    pylab.legend(loc=legend_loc, prop=legendfont)
    pylab.title(title)
    pylab.hold(False)
    
    if 'Not first' in histogram:
        print '%s, first vs. non-first ranksum test: ' % title + '(%f, %f)' % stats.ranksums(histogram[1], histogram['Not first'])
    
    if 'Inhibited' in histogram:
        print '%s, inhibited vs. non-regulated ranksum test: ' % title + '(%f, %f)' % stats.ranksums(histogram['Inhibited'], histogram['No known regulation'])
         
    
    #for k1, h1 in histogram.iteritems():
    #    for k2, h2 in histogram.iteritems():
    #        print k1, k2, stats.ranksums(h1, h2)
    
    return fig
Пример #30
0
def get_stats(typeid,date):
    global db
    cur = db.cursor()
    cur.execute("SELECT AVG(price),SUM(volremain),SUM(volenter) - SUM(volremain),bid FROM archive_market WHERE typeid = %s AND (reportedtime) :: date = %s GROUP BY orderid,bid", [typeid, date])
    a = cur.fetchall()
    avg_b = array(zeros(len(a)),dtype=float)
    vol_b = array(zeros(len(a)),dtype=float)
    move_b = array(zeros(len(a)),dtype=float)
    avg_s = array(zeros(len(a)),dtype=float)
    vol_s = array(zeros(len(a)),dtype=float)
    move_s = array(zeros(len(a)),dtype=float)

    x_s = 0
    x_b = 0
    for r in a:
	if r[3]:
	    avg_b[x_b] = r[0]

	    vol_b[x_b] = r[1]
	    move_b[x_b] = r[2]
	    x_b += 1
	else:
	    avg_s[x_s] = r[0]
	    vol_s[x_s] = r[1]
	    move_s[x_s] = r[2]
	    x_s += 1
    avg_b.resize(x_b)
    avg_s.resize(x_s)
    vol_b.resize(x_b)
    vol_s.resize(x_s)
    move_b.resize(x_b)
    move_s.resize(x_s)
    b = (None,None,None)
    s = (None,None,None)
    try:
	b = (pylab.median(avg_b), pylab.mean(vol_b), pylab.mean(move_b))
	s = (pylab.median(avg_s), pylab.mean(vol_s), pylab.mean(move_s))
    except:
	return (b,b,b)

    ret = ( ((b[0]+s[0])/2, (b[1]+s[1])/2, (b[2]+s[2])/2), b, s)
    print ret
    return ret
Пример #31
0
def DFA(data, npoints=None, degree=1, use_median=False):
    """
    computes the detrended fluctuation analysis
    returns the fluctuation F and the corresponding window length L

    :args:
        data (n-by-1 array): the data from which to compute the DFA
        npoints (int): the number of points to evaluate; if omitted the log(n)
            will be used
        degree (int): degree of the polynomial to use for detrending
        use_median (bool): use median instead of mean fluctuation

    :returns:
        F, L: the fluctuation F as function of the window length L

    """
    # max window length: n/4

    #0th: compute integral
    integral = cumsum(data - mean(data))

    #1st: compute different window lengths
    n_samples = npoints if npoints is not None else int(log(len(data)))
    lengths = sort(array(list(set(
            logspace(2,log(len(data)/4.),n_samples,base=exp(1)).astype(int)
             ))))

    #print lengths
    all_flucs = []
    used_lengths = []
    for wlen in lengths:
        # compute the fluctuation of residuals from a linear fit
        # according to Kantz&Schreiber, ddof must be the degree of polynomial,
        # i.e. 1 (or 2, if mean also counts? -> see in book)
        curr_fluc = []
#        rrt = 0
        for startIdx in arange(0,len(integral),wlen):
            pt = integral[startIdx:startIdx+wlen]
            if len(pt) > 3*(degree+1):
                resids = pt - polyval(polyfit(arange(len(pt)),pt,degree),
                                  arange(len(pt)))
#                if abs(wlen - lengths[0]) < -1:
#                    print resids[:20]
#                elif rrt == 0:
#                    print "wlen", wlen, "l0", lengths[0]
#                    rrt += 1
                curr_fluc.append(std(resids, ddof=degree+1))
        if len(curr_fluc) > 0:
            if use_median:
                all_flucs.append(median(curr_fluc))
            else:
                all_flucs.append(mean(curr_fluc))
            used_lengths.append(wlen)
    return array(all_flucs), array(used_lengths)
Пример #32
0
def simulateGame(numGames, tilePairs, gameType):
    runTimeList = []
    for i in range(numGames):
        gameTime = gameType(tilePairs)
        runTimeList.append(gameTime)
    medTime = pylab.median(runTimeList)
    meanTime = pylab.mean(runTimeList)
    pylab.hist(runTimeList,[x*2 for x in range(400)])
    print 'meanTime: ' + str(meanTime)
    print 'medianTime: ' + str(medTime)
    return meanTime, medTime
Пример #33
0
def createMountain(x, y):
    "create the mountain at locations x and y"
    nx = len(x)
    ny = len(y)
    h0 = py.zeros([nx,ny])
    
    # mountain parameters
    xc = py.median(x)         # mountain centre
    yc = py.median(y)
    rm = 1.5e6                  # mountain radius
    h0max = 500.              # mountain peak height
    #h0max = 0.
    
    # set the mountain for all i,j locations
    for i in xrange(0,nx):
        for j in xrange(0,ny):
            dist = py.sqrt((x[i] - xc)**2 + (y[j] - yc)**2)
            if dist < rm:
                h0[i,j] = h0max*(1-dist/rm)

    return h0
Пример #34
0
def truncation_hist(df, outdir=FIGS_DIR):
    df2 = df[df['neuron type'].isin(['axon', 'truncated axon'])]
    df2 = df2.drop_duplicates(['neuron name', 'neuron type'])

    type_alphas = defaultdict(list)
    for neuron_name, group in df2.groupby('neuron name'):
        if len(group['neuron type']) < 2:
            continue
        for neuron_type, group2 in group.groupby('neuron type'):
            type_alphas[neuron_type] += list(group2['alpha'])

    alphas = []
    weights = []
    labels = []
    for neuron_type in type_alphas:
        alpha = type_alphas[neuron_type]
        alphas.append(alpha)
        weights.append(pylab.ones_like(alpha) / float(len(alpha)))
        labels.append(neuron_type)

    pylab.figure()
    sns.set()
    
    pylab.hist(alphas, range=(0, 1), weights=weights, label=labels)
    leg = pylab.legend(frameon=True)
    pylab.setp(leg.get_texts(), fontsize=20)
    leg_frame = leg.get_frame()
    leg_frame.set_linewidth(5)
    leg_frame.set_edgecolor('k')
    
    curr_ax = pylab.gca()
    curr_ax.set_ylim((0, 1))
    
    pylab.xlabel('alpha', size=30)
    pylab.ylabel('proportion', size=30)
    pylab.tight_layout()
    
    name = 'truncation_hist'
    outname = '%s/%s.pdf' % (outdir, name)
    outname = outname.replace(' ', '_')
    pylab.savefig('%s/%s.pdf' % (outdir, name), format='pdf')
    pylab.close()

    axons = pylab.array(type_alphas['axon'])
    truncated_axons = pylab.array(type_alphas['truncated axon'])
    differences = axons - truncated_axons
    print '-------------------------'
    print "Truncation test"
    print min(differences), pylab.median(differences), max(differences)
    print pylab.mean(differences), "+/-", pylab.std(differences, ddof=1)
    print wilcoxon(axons, truncated_axons)
    print ttest_rel(axons, truncated_axons)
Пример #35
0
def create_graph(name, sequences, cutoff=None):
    """Creates a graph based on the n-gram similarity of the sequences in the
  given list of sequences"""
    nodes = [s.name() for s in sequences]
    edges = [(s1.name(), s2.name(), s1.distance(s2)) for s1 in sequences
             for s2 in sequences]
    cutoff = cutoff if cutoff else pyl.median([d for (n1, n2, d) in edges])
    print "edge cutoff: %.2f" % cutoff
    edges = filter(lambda (n1, n2, d): d < cutoff, edges)
    G = nx.Graph(name=name)
    G.add_nodes_from(nodes)
    G.add_edges_from(edges)
    return G
Пример #36
0
    def __init__(self, data, time):
        # data format: multidimensional numpy array
        #              Each inner array is an array of OD values
        #              ordered by time.
        #              This is important for determining the median

        self.dataReps = data  # OD data values (replicates implied)
        self.dataMed = py.median(self.dataReps, axis=0)
        self.time = time  # time values
        self.asymptote = self.__calcAsymptote()
        self.maxGrowthRate, self.mgrTime = self.__calcMGR()
        self.dataLogistic, self.lag = self.__calcLag()
        self.growthLevel = self.__calcGrowth()
Пример #37
0
def validate_age_group(model, replicate):
    # set random seed for reproducibility
    mc.np.random.seed(1234567 + replicate)

    N = 30
    delta_true = 5.0
    pi_true = true_rate_function
    m = simulate_age_group_data(N=N, delta_true=delta_true, pi_true=pi_true)

    if model == "midpoint_covariate":
        fit_midpoint_covariate_model(m)
    if model == "alt_midpoint_covariate":
        fit_alt_midpoint_covariate_model(m)
    elif model == "age_standardizing":
        fit_age_standardizing_model(m)
    elif model == "age_integrating":
        fit_age_integrating_model(m)
    elif model == "midpoint_model":
        fit_midpoint_model(m)
    elif model == "disaggregation_model":
        fit_disaggregation_model(m)
    else:
        raise TypeError, 'Unknown model type: "%s"' % model

    # compare estimate to ground truth
    import data_simulation

    m.mu = pandas.DataFrame(
        dict(
            true=[pi_true(a) for a in range(101)],
            mu_pred=m.vars["mu_age"].stats()["mean"],
            sigma_pred=m.vars["mu_age"].stats()["standard deviation"],
        )
    )
    data_simulation.add_quality_metrics(m.mu)
    print "\nparam prediction bias: %.5f, MARE: %.3f, coverage: %.2f" % (
        m.mu["abs_err"].mean(),
        pl.median(pl.absolute(m.mu["rel_err"].dropna())),
        m.mu["covered?"].mean(),
    )
    print

    data_simulation.add_quality_metrics(m.mu)

    data_simulation.initialize_results(m)
    data_simulation.add_to_results(m, "mu")
    data_simulation.finalize_results(m)

    return m
def get_uniform_data(*args):

    # internally define variables
    dist_old = args[0]
    elev_old = args[1]
    func_class_old = args[2]
    wav_lst_old = args[3]
    nom_dist_window = args[4]

    window_cnt = mp.ceil(max(dist_old) / nom_dist_window)
    act_dist_window = max(dist_old) / window_cnt

    dist_new = mp.linspace(0.0, dist_old[-1], window_cnt + 1)
    elev_new = np.asarray([-1.0] * len(dist_new))
    func_class_new = np.zeros(len(dist_new)) - 1.0
    wav_lst_new = np.zeros(len(dist_new)) - 1.0

    for i in range(len(dist_new)):
        logical1 = dist_old >= (dist_new[i] - act_dist_window / 2.0)
        logical2 = dist_old <= (dist_new[i] + act_dist_window / 2.0)
        ind = mp.find(np.bitwise_and(logical1, logical2))
        if len(ind) != 0:
            y0 = elev_old[ind]
            elev_new[i] = mp.median(y0)
            func_class_mode, func_class_mode_cnt = stats.mode(
                func_class_old[ind])
            func_class_new[i] = np.copy(func_class_mode)
            wav_mode, wav_mode_cnt = stats.mode(wav_lst_old[ind])
            wav_lst_new[i] = np.copy(wav_mode)

    elev_new[0] = 1.0 * elev_old[0]
    elev_new[-1] = 1.0 * elev_old[-1]

    ind = mp.find(elev_new != -1.0)
    if len(ind) > 1:
        elev_new_func = interp1d(dist_new[ind], elev_new[ind], kind=1)
        elev_new = elev_new_func(dist_new)

    ind = mp.find(func_class_new != -1.0)
    if len(ind) > 1:
        fc_new_func = interp1d(dist_new[ind], func_class_new[ind], kind=0)
        func_class_new = fc_new_func(dist_new)

    ind = mp.find(wav_lst_new != -1.0)
    if len(ind) > 1:
        wav_new_func = interp1d(dist_new[ind], wav_lst_new[ind], kind=0)
        wav_lst_new = wav_new_func(dist_new)

    return dist_new, elev_new, func_class_new, wav_lst_new
Пример #39
0
def readDatDirectory(key, directory):
    global stats
    #Don't read data in if it's already read
    if not key in DATA["mean"]:
        data = defaultdict(array)

        #Process the dat files
        for datfile in glob.glob(directory + "/*.dat"):
            fileHandle = open(datfile, 'rb')
            keys, dataDict = csvExtractAllCols(fileHandle)
            stats = union(stats, keys)
            for aKey in keys:
                if not aKey in data:
                    data[aKey] = reshape(array(dataDict[aKey]),
                                         (1, len(dataDict[aKey])))
                else:
                    data[aKey] = append(data[aKey],
                                        reshape(array(dataDict[aKey]),
                                                (1, len(dataDict[aKey]))),
                                        axis=0)

        #Process the div files'
        for datfile in glob.glob(directory + "/*.div"):
            fileHandle = open(datfile, 'rb')
            keys, dataDict = csvExtractAllCols(fileHandle)
            stats = union(stats, keys)
            for aKey in keys:
                if not aKey in data:
                    data[aKey] = reshape(array(dataDict[aKey]),
                                         (1, len(dataDict[aKey])))
                else:
                    data[aKey] = append(data[aKey],
                                        reshape(array(dataDict[aKey]),
                                                (1, len(dataDict[aKey]))),
                                        axis=0)

        #Iterate through the stats and calculate mean/standard deviation
        for aKey in stats:
            if aKey in data:
                DATA["mean"][key][aKey] = mean(data[aKey], axis=0)
                DATA["median"][key][aKey] = median(data[aKey], axis=0)
                DATA["std"][key][aKey] = std(data[aKey], axis=0)
                DATA["ste"][key][aKey] = std(data[aKey], axis=0) / sqrt(
                    len(data[aKey]))
                DATA["min"][key][aKey] = mean(data[aKey], axis=0) - amin(
                    data[aKey], axis=0)
                DATA["max"][key][aKey] = amax(data[aKey], axis=0) - mean(
                    data[aKey], axis=0)
                DATA["actual"][key][aKey] = data[aKey]
Пример #40
0
    def Create(cls, Image):
        '''Returns an object with the mean,median,std.dev of an image,
           this object is attached to the image object and only calculated once'''

        if '__IrtoolsImageStats__' in Image.__dict__:
            return Image.__dict__["__IrtoolsImageStats__"]

        istats = ImageStats()

        istats._median = median(Image)
        istats._mean = mean(Image)
        istats._stddev = std(Image)

        Image.__dict__["__IrtoolsImageStats__"] = istats
        return istats
Пример #41
0
def check_page(image):
    if len(image.shape) == 3:
        return "input image is color image %s" % (image.shape, )
    if mean(image) < median(image):
        return "image may be inverted"
    h, w = image.shape
    if h < 600:
        return "image not tall enough for a page image %s" % (image.shape, )
    if h > 10000:
        return "image too tall for a page image %s" % (image.shape, )
    if w < 600:
        return "image too narrow for a page image %s" % (image.shape, )
    if w > 10000:
        return "line too wide for a page image %s" % (image.shape, )
    return None
Пример #42
0
def fit_single_gaussian(wav, flux, flux_err, name_line, wav_line, isplot=False, silent=True):
    p0 = [pl.median(flux), wav_line, 2., 10.]
    parinfo = [{'value': 0., 'fixed': 0, 'limited': [0, 0], 'limits': [0., 0.], 'parname': 'name'}]
    parinfo[0]['value'] = p0[0]
    parinfo[0]['parname'] = 'constant'
    parinfo.extend(generate_parinfo(p0=p0[1:]))
    fdata = {'x': wav, 'y': flux, 'err': flux_err, 'ngaussian': 1}
    res = mpfit(myfunct_gaussian, p0, parinfo=parinfo, functkw=fdata, quiet=silent)
    if (res.status < 0) or (res.perror is None):
        print('error message = ', res.errmsg)
        return emptyline(name_line, wav_line)
    line = LineProfile(name_line, res.params[1], res.params[3], res.perror[3], res.params[2], res.perror[2], res.params[0])
    if isplot:
        line.spec = [wav, flux]
    return line
Пример #43
0
def plot_output_distribution(out,title):
    from splikes.utils import paramtext

    out=out.ravel()
    out_full=out

    result=py.hist(out,200)
    paramtext(1.2,0.95,
              'min %f' % min(out_full),
              'max %f' % max(out_full),
              'mean %f' % py.mean(out_full),
              'median %f' % py.median(out_full),
              'std %f' % py.std(out_full),
              )
    py.title(title)
Пример #44
0
def histogram_and_fit(distribution_name,
                      points,
                      bins=10,
                      units="",
                      **fit_kwargs):
    histogram = pylab.hist(points, bins)
    bins = histogram[1]
    bin_step = pylab.median(pylab.diff(bins))
    distribution = _get_distribution(distribution_name)
    fit = distribution.fit(points, **fit_kwargs)
    xs = pylab.linspace(min(bins), max(bins), 1000)
    ys = distribution.pdf(xs, *fit)
    label = _get_label(distribution_name, fit, units)
    pylab.plot(xs, ys * len(points) * bin_step, 'r', label=label)
    pylab.legend()
    return fit
Пример #45
0
def plot_output_distribution(out, title):
    from splikes.utils import paramtext

    out = out.ravel()
    out_full = out

    result = py.hist(out, 200)
    paramtext(
        1.2,
        0.95,
        'min %f' % min(out_full),
        'max %f' % max(out_full),
        'mean %f' % py.mean(out_full),
        'median %f' % py.median(out_full),
        'std %f' % py.std(out_full),
    )
    py.title(title)
Пример #46
0
def fit(model):
    emp_priors = model.emp_priors

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, emp_priors['p', 'mu'], emp_priors['p', 'sigma'])
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=5000, burn=2000, thin=25, tune_interval=100)
    #model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=101, burn=0, thin=1, tune_interval=100)

    #graphics.plot_one_ppc(model.vars['p'], 'p')
    #graphics.plot_convergence_diag(model.vars)
    graphics.plot_one_type(model, model.vars['p'], emp_priors, 'p')
    pl.plot(model.a, model.pi_age_true, 'b--', linewidth=3, alpha=.5, label='Truth')
    pl.legend(fancybox=True, shadow=True, loc='upper left')
    pl.title('Heterogeneity %s'%model.parameters['p']['heterogeneity'])

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation']
    data_simulation.add_quality_metrics(model.input_data)

    model.delta = pandas.DataFrame(dict(true=[model.delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    data_simulation.add_quality_metrics(model.delta)

    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(),
                                                     pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
                                                                       model.input_data['covered?'].mean())

    model.mu = pandas.DataFrame(dict(true=model.pi_age_true,
                                     mu_pred=model.vars['p']['mu_age'].stats()['mean'],
                                     sigma_pred=model.vars['p']['mu_age'].stats()['standard deviation']))
    data_simulation.add_quality_metrics(model.mu)

    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'delta')
    data_simulation.add_to_results(model, 'mu')
    data_simulation.add_to_results(model, 'input_data')
    data_simulation.finalize_results(model)

    print model.results
Пример #47
0
def boot_curvefit(x, y, fit, p0, ci=.05, bootstraps=2000):
    """use of bootstrapping to perform curve fitting.
  Inputs:
    x - x values
    y - corresponding y values
    fit - a packaged fitting function
    p0 - intial parameter list that fit will use

    fit should be a function of the form
      p1 = fit(x, y, p0)
    with p1 being the optimized parameter vector

  Outputs:
    ci - 3xn array (n = number of parameters: median, low_ci, high_ci)
    booted_p - an bxn array of parameter values (b = number of bootstraps)

  An example fit function is:

  def fit(x, y, p0):
    func = lambda p, t: p[0]*pylab.exp(-t/abs(p[1])) + p[2]
    errfunc = lambda p, t, y: func(p, t) - y
    p1, success = optimize.leastsq(errfunc, p0, args=(t, y))
    return p1

  """

    p0 = pylab.array(p0)  #Make it an array in case it isn't one
    if bootstraps > 1:
        idx = pylab.randint(x.size, size=(x.size, bootstraps))
    else:
        idx = pylab.zeros((x.size, 1), dtype=int)
        idx[:, 0] = pylab.arange(x.size)
    booted_p = pylab.zeros((p0.size, bootstraps))
    for n in xrange(bootstraps):
        booted_p[:, n] = fit(x[idx[:, n]], y[idx[:, n]], p0)

    p_ci = pylab.zeros((3, p0.size))
    for p in xrange(p0.size):
        booted_samp = pylab.sort(booted_p[p])
        med = pylab.median(booted_samp)
        idx_lo = int(bootstraps * ci / 2.0)
        idx_hi = int(bootstraps * (1.0 - ci / 2))
        p_ci[:,
             p] = [med, med - booted_samp[idx_lo], booted_samp[idx_hi] - med]

    return p_ci, booted_p
def mare(pred, obs):
    ''' model median absolute relative error
    Parameters
    ----------
    pred : df
      df of observations from model.vars[data_type]['p_pred'].stats()['mean']
    obs : df
      df of observations from model.vars[data_type]['p_obs'].value
    Results
    -------
    mare : float
      mean median absolute relative error, as a percent
    '''
    pred = pl.array(pred['mean'])
    obs = pl.array(obs['value']) 
    mare = pl.median((abs(pred - obs)/obs)*100)
    return mare
Пример #49
0
def readDatDirectory(key, directory):
    global stats
    #Don't read data in if it's already read
    if not key in DATA["mean"]:
        data = defaultdict(array)

        #Process the dat files
        for datfile in glob.glob(directory + "/*.dat"):
            fileHandle = open(datfile, 'rb')
            keys, dataDict = csvExtractAllCols(fileHandle)
            stats = union(stats, keys)
            for aKey in keys:
                if not aKey in data:
                    data[aKey] = reshape(array(dataDict[aKey]),
                                         (1, len(dataDict[aKey])))
                else:
                    data[aKey] = append(data[aKey],
                                        reshape(array(dataDict[aKey]),
                                                (1, len(dataDict[aKey]))),
                                        axis=0)

        #Process the div files'
        for datfile in glob.glob(directory + "/*.div"):
            fileHandle = open(datfile, 'rb')
            keys, dataDict = csvExtractAllCols(fileHandle)
            stats = union(stats, keys)
            for aKey in keys:
                if not aKey in data:
                    data[aKey] = reshape(array(dataDict[aKey]),
                                         (1, len(dataDict[aKey])))
                else:
                    data[aKey] = append(data[aKey],
                                        reshape(array(dataDict[aKey]),
                                                (1, len(dataDict[aKey]))),
                                        axis=0)

        #Iterate through the stats and calculate mean/standard deviation
        for aKey in stats:
            if aKey in data:
                DATA["mean"][key][aKey] = mean(data[aKey], axis=0)
                DATA["median"][key][aKey] = median(data[aKey], axis=0)
                DATA["std"][key][aKey] = std(data[aKey], axis=0)
                DATA["ste"][key][aKey] = std(data[aKey], axis=0)/ sqrt(len(data[aKey]))
                DATA["min"][key][aKey] = mean(data[aKey], axis=0)-amin(data[aKey], axis=0)
                DATA["max"][key][aKey] = amax(data[aKey], axis=0)-mean(data[aKey], axis=0)
                DATA["actual"][key][aKey] = data[aKey]
Пример #50
0
def boot_curvefit(x,y,fit, p0, ci = .05, bootstraps=2000):
  """use of bootstrapping to perform curve fitting.
  Inputs:
    x - x values
    y - corresponding y values
    fit - a packaged fitting function
    p0 - intial parameter list that fit will use

    fit should be a function of the form
      p1 = fit(x, y, p0)
    with p1 being the optimized parameter vector

  Outputs:
    ci - 3xn array (n = number of parameters: median, low_ci, high_ci)
    booted_p - an bxn array of parameter values (b = number of bootstraps)

  An example fit function is:

  def fit(x, y, p0):
    func = lambda p, t: p[0]*pylab.exp(-t/abs(p[1])) + p[2]
    errfunc = lambda p, t, y: func(p, t) - y
    p1, success = optimize.leastsq(errfunc, p0, args=(t, y))
    return p1

  """

  p0 = pylab.array(p0) #Make it an array in case it isn't one
  if bootstraps > 1:
    idx = pylab.randint(x.size, size=(x.size, bootstraps))
  else:
    idx = pylab.zeros((x.size,1),dtype=int)
    idx[:,0] = pylab.arange(x.size)
  booted_p = pylab.zeros((p0.size, bootstraps))
  for n in xrange(bootstraps):
    booted_p[:,n] = fit(x[idx[:,n]], y[idx[:,n]], p0)

  p_ci = pylab.zeros((3, p0.size))
  for p in xrange(p0.size):
    booted_samp = pylab.sort(booted_p[p])
    med = pylab.median(booted_samp)
    idx_lo = int(bootstraps * ci/2.0)
    idx_hi = int(bootstraps * (1.0-ci/2))
    p_ci[:,p] = [med, med-booted_samp[idx_lo], booted_samp[idx_hi]-med]

  return p_ci, booted_p
Пример #51
0
def validate_age_group(model, replicate):
    # set random seed for reproducibility
    mc.np.random.seed(1234567+replicate)

    N = 30
    delta_true = 5.
    pi_true = true_rate_function
    m = simulate_age_group_data(N=N, delta_true=delta_true, pi_true=pi_true)
    
    if model == 'midpoint_covariate':
        fit_midpoint_covariate_model(m)
    elif model == 'age_standardizing':
        fit_age_standardizing_model(m)
    elif model == 'age_integrating':
        fit_age_integrating_model(m)
    elif model == 'midpoint_model':
        fit_midpoint_model(m)
    elif model == 'disaggregation_model':
        fit_disaggregation_model(m)
    else:
        raise TypeError, 'Unknown model type: "%s"' % model


    # compare estimate to ground truth
    import data_simulation
    m.mu = pandas.DataFrame(dict(true=[pi_true(a) for a in range(101)],
                                 mu_pred=m.vars['mu_age'].stats()['mean'],
                                 lb_pred=m.vars['mu_age'].stats()['95% HPD interval'][:,0],
                                 ub_pred=m.vars['mu_age'].stats()['95% HPD interval'][:,1]))
    data_simulation.add_quality_metrics(m.mu)
    print '\nparam prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (m.mu['abs_err'].mean(),
                                                                         pl.median(pl.absolute(m.mu['rel_err'].dropna())),
                                                                         m.mu['covered?'].mean())
    print


    data_simulation.add_quality_metrics(m.mu)

    data_simulation.initialize_results(m)
    data_simulation.add_to_results(m, 'mu')
    data_simulation.finalize_results(m)

    return m
Пример #52
0
def validate_age_group(model, replicate):
    # set random seed for reproducibility
    mc.np.random.seed(1234567 + replicate)

    N = 30
    delta_true = 5.
    pi_true = true_rate_function
    m = simulate_age_group_data(N=N, delta_true=delta_true, pi_true=pi_true)

    if model == 'midpoint_covariate':
        fit_midpoint_covariate_model(m)
    if model == 'alt_midpoint_covariate':
        fit_alt_midpoint_covariate_model(m)
    elif model == 'age_standardizing':
        fit_age_standardizing_model(m)
    elif model == 'age_integrating':
        fit_age_integrating_model(m)
    elif model == 'midpoint_model':
        fit_midpoint_model(m)
    elif model == 'disaggregation_model':
        fit_disaggregation_model(m)
    else:
        raise TypeError, 'Unknown model type: "%s"' % model

    # compare estimate to ground truth
    import data_simulation
    m.mu = pandas.DataFrame(
        dict(true=[pi_true(a) for a in range(101)],
             mu_pred=m.vars['mu_age'].stats()['mean'],
             sigma_pred=m.vars['mu_age'].stats()['standard deviation']))
    data_simulation.add_quality_metrics(m.mu)
    print '\nparam prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (
        m.mu['abs_err'].mean(), pl.median(pl.absolute(
            m.mu['rel_err'].dropna())), m.mu['covered?'].mean())
    print

    data_simulation.add_quality_metrics(m.mu)

    data_simulation.initialize_results(m)
    data_simulation.add_to_results(m, 'mu')
    data_simulation.finalize_results(m)

    return m
Пример #53
0
def store_results(dm, area, sex, year):
    types_to_plot = 'p i r rr'.split()

    graphics.plot_convergence_diag(dm.vars)
    pl.clf()
    for i, t in enumerate(types_to_plot):
        pl.subplot(len(types_to_plot), 1, i+1)
        graphics.plot_data_bars(dm.model.get_data(t))
        pl.plot(range(101), dm.emp_priors[t, 'mu'], linestyle='dashed', color='grey', label='Emp. Prior', linewidth=3)
        pl.plot(range(101), dm.true[t], 'b-', label='Truth', linewidth=3)
        pl.plot(range(101), dm.posteriors[t].mean(0), 'r-', label='Estimate', linewidth=3)

        pl.errorbar(range(101), dm.posteriors[t].mean(0), yerr=1.96*dm.posteriors[t].std(0), fmt='r-', linewidth=1, capsize=0)

        pl.ylabel(t)
        graphics.expand_axis()
    
    pl.legend(loc=(0.,-.95), fancybox=True, shadow=True)
    pl.subplots_adjust(hspace=0, left=.1, right=.95, bottom=.2, top=.95)
    pl.xlabel('Age (Years)')
    pl.show()

    model = dm
    model.mu = pandas.DataFrame()
    for t in types_to_plot:
        model.mu = model.mu.append(pandas.DataFrame(dict(true=dm.true[t],
                                                         mu_pred=dm.posteriors[t].mean(0),
                                                         sigma_pred=dm.posteriors[t].std(0))),
                                   ignore_index=True)
    data_simulation.add_quality_metrics(model.mu)
    print '\nparam prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.mu['abs_err'].mean(),
                                                                         pl.median(pl.absolute(model.mu['rel_err'].dropna())),
                                                                         model.mu['covered?'].mean())
    print

    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'mu')
    data_simulation.finalize_results(model)

    print model.results

    return model
Пример #54
0
def try_kegg_api():
    db = SqliteDatabase('../res/gibbs.sqlite')
    html_writer = HtmlWriter('../res/dG0_test.html')
    G = GroupContribution(db, html_writer=html_writer)
    G.init()
    
    wsdl = 'http://soap.genome.jp/KEGG.wsdl'
    serv = WSDL.Proxy(wsdl)
    
    rid_file = open('../res/eco_rids.txt', 'w')
    rids = set()
    for x in serv.list_pathways('eco'):
        pathway_id = x['entry_id']
        for reaction_id in serv.get_reactions_by_pathway(pathway_id):
            rid = int(reaction_id[4:])
            if rid not in rids:
                rids.add(rid)
                rid_file.write('%d\n' % rid)
    rid_file.close()
            
    c_mid = 1e-3
    pH, pMg, I, T = (7.0, 3.0, 0.1, 298.15)
    
    rid2reversibility = {}
    misses = 0
    for rid in sorted(rids):
        try:
            reaction = G.kegg.rid2reaction(rid)
            r = CalculateReversability(reaction, G, c_mid, pH, pMg, I, T)
            rid2reversibility[rid] = r
        except thermodynamics.MissingCompoundFormationEnergy:
            misses += 1
            continue
    
    print 'hits = %d, misses = %d' % len(rid2reversibility), misses
    median = pylab.median(rid2reversibility.values())
    print 'median = %.1f' % median

    pylab.figure()
    pylab.hold(True)
    plotting.cdf(rid2reversibility.values(), 'all reactions', 'r', show_median=True)
    pylab.show()
Пример #55
0
def stats(results):
    """
    Compute and print statistics for record to stdout.
    In:
        results : list of dicts, processing results
    """
    means = {'beta':0.0,'std':0.0, 'cov':0.0, 'mean':0.0}
    mins = means.copy()
    maxs = means.copy()
    medians = means.copy()
    stdevs = means.copy()
    for param in means.keys():
        mins[param] = min([v[param] for v in results])
        maxs[param] = max([v[param] for v in results])
        means[param] = pl.mean([v[param] for v in results])
        medians[param] = pl.median([v[param] for v in results])
        stdevs[param] = pl.std([v[param] for v in results])
    print "Min:\t\t%(beta)0.2f\t%(std)d\t%(cov)0.2f%%\t%(mean)d" % mins
    print "Max:\t\t%(beta)0.2f\t%(std)d\t%(cov)0.2f%%\t%(mean)d" % maxs
    print "Mean:\t\t%(beta)0.2f\t%(std)d\t%(cov)0.2f%%\t%(mean)d" % means
    print "Median:\t\t%(beta)0.2f\t%(std)d\t%(cov)0.2f%%\t%(mean)d" % medians
    print "Stdev:\t\t%(beta)0.2f\t%(std)d\t%(cov)0.2f%%\t%(mean)d" % stdevs
Пример #56
0
 def makePlots(self, ax, query, fNum, fColor, fMarker, feedstock):
             
     query.getQuery()
     
     if query.queryString.startswith('No'):
         pass    
     
     elif query.queryString.startswith('FR'):
         data = [1,1]
         ax.plot([fNum]*2,[1,1],fColor,marker=fMarker,markersize=2)
         
     else:
         data = self.db.output(query.queryString, self.db.schema)
         medVal = median(data)
         maxVal = max(data)
         minVal = min(data)
         
         ax.plot([fNum],medVal,fColor,marker='_', markersize=7)
 
         #Plot the max/min values
         ax.plot([fNum]*2,[maxVal, minVal],fColor,marker=fMarker, markersize=2)    
         
         self.writeResults(feedstock, str(maxVal[0]), str(medVal), str(minVal[0]))
Пример #57
0
fwhm = []

for i in range(61,395):
    print i
    
    # open spectrum and calculate continuum level near Ha line then write to cursor file
    data = pf.getdata('fec2117_%04d.fits'%i)
    head = pf.getheader('fec2117_%04d.fits'%i)
    start = head['CRVAL1']
    step = head['CDELT1']
    length = head['NAXIS1']
    x = start + pl.arange(0,length)*step
    hi = x > 4640
    low = x < 4730
    xx = hi*low
    med = pl.median(data[xx])
    print med
    cursor = open('cursor','w')
    cursor.write('4640 %s 1 k\n4730 %s 1 k\n'%(med,med))
    cursor.close()
    
    iraf.splot(images='fec2117_%04d.fits'%i,\
    cursor='cursor',\
    save_file='splot.log')
    
    # read splot.log and extract the results
    myfile = open('splot.log','r')
    lines = myfile.readlines()
    try:
        # the first log written to empty file has header
        temp = string.split(string.strip(lines[3]))