Пример #1
0
def get_lognormal_effluent():
    df = pd.read_excel('./model_WWT/SSD_effluent.xlsx',
                       parse_dates=['Date'],
                       index_col='Date')
    df = df.dropna()
    df = df[(df.iloc[:, 0] < 115)]
    mask = (df != 0).any(axis=1)
    df = df.loc[mask]
    df_min, df_max = min(df.iloc[:, 0]), max(df.iloc[:, 0])
    df['month'] = df.index.month
    # df['year'] = df.index.year
    # eff_np = np.zeros((12,2020-1990))
    # for i in range (2020-1990):
    #     for j in range(12):
    #         eff_np[j,i] = df[(df.year==1990+i) & (df.month==1+j)].iloc[:,0].mean()

    sigma = []
    mu = []
    for i in range(12):
        data = df[(df.month == i + 1)].iloc[:, 0]
        parm = lognorm.fit(data, floc=0)
        sigma.append(parm[0])
        mu.append(np.log(parm[2]))
#
#        mean = np.exp(mu + 1/2*(sigma**2))
#        mean_data = data.mean()
#        median = np.exp(mu)
#        cv = np.sqrt(np.exp(sigma**2) - 1)
#        sd = mean*np.sqrt(np.exp(sigma**2) - 1)
    data = df.iloc[:, 0]
    parm = lognorm.fit(data, floc=0)
    sigma.append(parm[0])
    mu.append(np.log(parm[2]))

    return mu, sigma
Пример #2
0
class TestQDM(NumpyTestCase.NumpyTestCase):
    badinput = 0.5
    nanarray = np.array([1, 2, 3, 4, np.nan])

    obsdist = lognorm.rvs(0.57, size=100)
    obsp = lognorm.fit(obsdist)
    refdist = lognorm.rvs(0.45, size=100)
    refp = lognorm.fit(refdist)
    futdist = lognorm.rvs(0.55, size=100)
    futp = lognorm.fit(futdist)
    x = np.linspace(0, 1, 101)
    qobs = np.quantile(obsdist, x)
    qref = np.quantile(refdist, x)
    qfut = np.quantile(futdist, x)

    def testQDMInput(self):
        """Test input is array-like"""
        self.assertRaises(TypeError, qdm, 0.5, 0.5, 0.5)

    def testQDMNanInput(self):
        """Test input array has no nan values"""
        self.assertRaises(ValueError, qdm, self.nanarray, self.nanarray,
                          self.nanarray)

    def testRefInput(self):
        """Test using reference data as future returns obs dist params"""
        testqfut = qdm(self.obsdist, self.refdist, self.refdist)
        testp = lognorm.fit(testqfut)
        self.assertAlmostEqual(self.obsp[0], testp[0], places=2)
        self.assertAlmostEqual(self.obsp[1], testp[1], places=2)
        self.assertAlmostEqual(self.obsp[2], testp[2], places=2)
    def dataHistogram(self):
        #Code for plotting the histogram of number of pickups
        plt.hist(self.dftaxi.num_pickups,normed=True, bins=5)
        plt.ylabel('Frequency')
        plt.title("Unscaled - Number of Pickups")
        plt.show()
        # define the figure with 2 subplots
        fig,ax = plt.subplots(1,2,figsize = (15,4))
        print("the unscaled graph is not representative and hence we go for scaling ")
        #if data is skewed negative binomial will perform better than poisson
        # histogram of the number of pickups
        self.dftaxi.num_pickups.hist(bins=30,ax=ax[0])
        ax[0].set_xlabel('Num of Pickups')
        ax[0].set_ylabel('Count')
        ax[0].set_yscale('log')
        ax[0].set_title('Histogram of Pickups - Normal Scale')

        # create a vector to hold num of pickups
        v = self.dftaxi.num_pickups 

        # plot the histogram with 30 bins
        v[~((v-v.median()).abs()>3*v.std())].hist(bins=30,ax=ax[1]) 
        ax[1].set_xlabel('Num of pickups')
        ax[1].set_ylabel('Count')
        ax[1].set_title('Histogram of Num of pickups - Scaled')
        print("A scaled graph is being plotted instead...!")
        print("\n")
        # apply a lognormal fit. Use the mean of trip distance as the scale parameter
        scatter,loc,mean = lognorm.fit(self.dftaxi.num_pickups.values,scale=self.dftaxi.num_pickups.mean(),loc=0)
        pdf_fitted = lognorm.pdf(np.arange(0,12,.1),scatter,loc,mean)
        ax[1].plot(np.arange(0,12,.1),600000*pdf_fitted,'r') 
        ax[1].legend(['data','lognormal fit'])
        plt.show()
Пример #4
0
 def testRefInput(self):
     """Test using reference data as future returns obs dist params"""
     testqfut = qdm(self.obsdist, self.refdist, self.refdist)
     testp = lognorm.fit(testqfut)
     self.assertAlmostEqual(self.obsp[0], testp[0], places=2)
     self.assertAlmostEqual(self.obsp[1], testp[1], places=2)
     self.assertAlmostEqual(self.obsp[2], testp[2], places=2)
Пример #5
0
    def test_fa(self):
        T = 10
        q = generic.fa(self.da, T, 'lognorm')

        p0 = lognorm.fit(self.da.values[:, 0, 0])
        q0 = lognorm.ppf(1 - 1. / T, *p0)
        np.testing.assert_array_equal(q[0, 0, 0], q0)
Пример #6
0
def logdistplot(d, **kwargs):
    import seaborn as sns
    from scipy.stats import lognorm
    ax = sns.distplot(d, fit=lognorm, **kwargs)
    shape, loc, scale = lognorm.fit(d)
    ax.set_title('Fit mode: {}'.format(loc + np.exp(np.log(scale) - shape**2)))
    plt.show()
Пример #7
0
    def returnDistData(cls, self):
        gammaParam = gamma.fit(10**(self.data / 10))
        gammaDist = gamma.pdf(self.data, *gammaParam)

        rayleighParam = rayleigh.fit(self.data)
        rayleighDist = rayleigh.pdf(self.data, *rayleighParam)

        normParam = norm.fit(self.data)
        normDist = norm.pdf(self.data, *normParam)

        logNormParam = lognorm.fit(self.data)
        lognormDist = lognorm.pdf(self.data, *logNormParam)

        nakagamiParam = nakagami.fit(self.data)
        nakagamiDist = nakagami.pdf(self.data, *nakagamiParam)

        exponParam = expon.fit(self.data)
        exponDist = expon.pdf(self.data, *exponParam)

        exponweibParam = exponweib.fit(self.data)
        weibDist = exponweib.pdf(self.data, *exponweibParam)

        distDF = pd.DataFrame(np.column_stack([
            gammaDist, rayleighDist, normDist, lognormDist, nakagamiDist,
            exponDist, weibDist
        ]),
                              columns=[
                                  'gammaDist', 'rayleighDist', 'normDist',
                                  'lognormDist', 'nakagamiDist', 'exponDist',
                                  'weibDist'
                              ])
        self.distDF = distDF
Пример #8
0
def distfit(n,dists,title,width,height,fwhm,dm,samples=1000):
    from scipy.stats import lognorm

    bins_h = int(height * 60. / 8.)
    bins_w = int(width * 60. / 8.)
    sig = ((bins_w/width)*fwhm)/2.355
    valsLP = []
    for i in range(samples) :
        random_ra = width*np.random.random_sample((n,))
        random_dec = height*np.random.random_sample((n,))
        random_xy = zip(random_ra,random_dec)
        grid_r, xedges_r, yedges_r = np.histogram2d(random_dec, random_ra, bins=[bins_h,bins_w], range=[[0,height],[0,width]])
        hist_points_r = zip(xedges_r,yedges_r)
        grid_gaus_r = ndimage.filters.gaussian_filter(grid_r, sig, mode='constant', cval=0)
        S_r = np.array(grid_gaus_r*0)

        grid_mean_r = np.mean(grid_gaus_r)
        grid_sigma_r = np.std(grid_gaus_r)
        S_r = (grid_gaus_r-grid_mean_r)/grid_sigma_r

        x_cent_r, y_cent_r = np.unravel_index(grid_gaus_r.argmax(),grid_gaus_r.shape)
        valsLP.append(S_r[x_cent_r][y_cent_r])

    x = np.linspace(2, 22, 4000)

    bins, edges = np.histogram(valsLP, bins=400, range=[2,22], normed=True)
    centers = (edges[:-1] + edges[1:])/2.

    al,loc,beta=lognorm.fit(valsLP)
    pct = 100.0*lognorm.cdf(dists, al, loc=loc, scale=beta)
    print 'Significance of detection:','{0:6.3f}%'.format(pct)
Пример #9
0
 def test_fa(self):
     T = 10
     q = generic.fa(self.da, T, "lognorm")
     assert "return_period" in q.coords
     p0 = lognorm.fit(self.da.values[:, 0, 0])
     q0 = lognorm.ppf(1 - 1.0 / T, *p0)
     np.testing.assert_array_equal(q[0, 0, 0], q0)
def logNormal(dset,a,b,c,d,e,f,g):
    global shape,loc,scale,distro,pdf
    try:
        plt.xscale(f)
    except ValueError:
        print "Specify the type of scale for the x axis"
        return
    
    if g == "linspace":
        distro = np.linspace(a,b,c)
    elif g == "logspace":
        distro = np.logspace(a,b,c)
        for x in range (0,len(distro)):
            #distro[x] = distro[x] + 40
            #adding 40 really does not matter for the overall distribution, it literally just shifts it right
            print ""
    else:
        print "That didn't work."
        return

    shape,loc,scale = lognorm.fit(dset)
    
    
    pdf = lognorm.pdf(distro, shape, loc, scale)
    
    plt.plot(distro, pdf,color=e) #formerly ax.plot
    plt.title(d + " PDF with data")
Пример #11
0
def response_time_dist(filename, column):
    """
    Returns the lognormal distribution fit of travel times.
    """
    dt = pandas.read_csv(filename)
    response = lognorm.fit(dt[column])
    click.echo(response)
    return response
Пример #12
0
def response_time_dist(filename, column):
    """
    Returns the lognormal distribution fit of travel times.
    """
    dt = pandas.read_csv(filename)
    response = lognorm.fit(dt[column])
    click.echo(response)
    return response
Пример #13
0
 def fit(self, data, s=1, loc=1, scale=1):
     (s, loc, scale) = lognorm.fit(data)
     self.s, self.loc, self.scale = (s, loc, scale)
     self.mu = np.log(self.scale)
     self.sigma = self.s
     self.params = {'s': self.s, 'loc': self.loc, 'scale': self.scale}
     self.setParams(self.params)
     return (self.s, self.loc, self.scale)
def fn_photonflux_hist(file_name, folder, mean_photons_per_sec):
    """
    Plots histogram of total number of and fits to lognormal distribution
    Inputs: data, filename and foldername which should be defined in the script
    
    """
    import numpy as np
    import matplotlib.pyplot as plt
    from scipy.stats import lognorm
    from pylab import text

    n_molecules = len(mean_photons_per_sec)

    #Plot photon flux
    figure_name = file_name + '_photonsPerSecond'
    ax = plt.subplot(111)
    num_bins = np.linspace(int(min(mean_photons_per_sec)),
                           int(max(mean_photons_per_sec)),
                           int(np.sqrt(len(mean_photons_per_sec)) * 4))
    ax.hist(mean_photons_per_sec,
            bins=num_bins,
            density=True,
            color='darkorange',
            edgecolor='black')

    #Fit curve
    sigma, loc, mean = lognorm.fit(mean_photons_per_sec, floc=0)
    pdf = lognorm.pdf(num_bins, sigma, loc,
                      mean)  #sigma=shape, mu=np.log(scale)
    ax.plot(num_bins, pdf, 'k', linestyle='--')

    #Edit plot
    plt.xlabel('Photon flux ($s^{-1}$)', fontname='Arial', fontsize=12)
    plt.ylabel('Probability density', fontname='Arial', fontsize=12)
    plt.xticks(fontname='Arial', fontsize=12)
    plt.yticks(fontname='Arial', fontsize=12)
    plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
    plt.ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
    text(0.75,
         0.95,
         'μ=' + str(round(mean, 2)) + ' photons $s^{-1}$',
         horizontalalignment='center',
         verticalalignment='center',
         transform=ax.transAxes,
         fontname='Arial',
         fontsize=12)
    text(0.40,
         0.95,
         'N=' + str(n_molecules),
         horizontalalignment='center',
         verticalalignment='center',
         transform=ax.transAxes,
         fontname='Arial',
         fontsize=12)
    plt.savefig(folder + '/Figures/PDFs' + '/' + figure_name + '.pdf', dpi=500)
    plt.savefig(folder + '/Figures/PNGs' + '/' + figure_name + '.png', dpi=500)

    return (plt.show())
Пример #15
0
def createHisto(histoData1, histoData2, imageName1, imageName2):
    """
    Creates a diagram showing two histograms.
    """
    
    fig = plt.figure()
    plt.subplot(111)
    data1 = histoData1['data']
    data2 = histoData2['data']
    n, bins, patches = plt.hist(data1, _NUMBER_OF_HISTO_BARS, range=(0, data1.max()), normed=0, \
            weights=np.zeros_like(data1)+1./data1.size, facecolor=_COLOR_FIRST_DATA[0], alpha=0.4, label=imageName1)
    n2, bins2, patches = plt.hist(data2, _NUMBER_OF_HISTO_BARS, range=(0, data2.max()), normed=0, \
            weights=np.zeros_like(data2)+1./data2.size, facecolor=_COLOR_SECOND_DATA[0], alpha=0.4, label=imageName2)
    
    # 'best fit' line
    shape, loc, scale = lognorm.fit(data1, floc=0) # Fit a curve to the variates
    maximum = data1.max() if data1.max()>data2.max() else data2.max()
    x = np.linspace(0, 1.2 * maximum, num=500)
    # scaling
    binlength = bins[1] - bins[0]
    alpha = factorize(n, binlength)
    
    shape2, loc2, scale2 = lognorm.fit(data2, floc=0) # Fit a curve to the variates
    # scaling
    binlength2 = bins2[1] - bins2[0]
    alpha2 = factorize(n2, binlength2)
    
    # plot functions
    simplefilter("ignore", RuntimeWarning)  # avoid warning in this method
#    plt.plot(bins[1:], n, 'b^', alpha=0.5)
    plt.plot(x, alpha * (lognorm.pdf(x, shape, loc=0, scale=scale)), _COLOR_FIRST_DATA[1]+'--')
#    plt.plot(bins2[1:], n2, 'g^', alpha=0.5)
    plt.plot(x, alpha2 * (lognorm.pdf(x, shape2, loc=0, scale=scale2)), _COLOR_SECOND_DATA[1]+'--')
    axe = plt.axis()
    newaxe =(axe[0], 1.2 * maximum, axe[2], axe[3])
    plt.axis(newaxe)
    plt.title(histoData1['title'])
    plt.ylabel(u'Relative frequency ' + r'$\left[\mathrm{\mathsf{ \frac{N}{\Sigma N} }}\right]$')
    plt.xlabel(histoData1['xlabel'])
    simplefilter("default", RuntimeWarning)
    
    # position the legend
    plt.legend(loc=0, frameon=0)
    plt.minorticks_on()
    return fig
Пример #16
0
 def __init__(self, mode=0, elem=None, sample=None):
     if mode == 0:
         self.s = elem[0]
         self.mu = elem[1]
         self.sigma = elem[2]
     else:
         self.s, self.mu, self.sigma = lognorm.fit(sample)
     self.math_average = lognorm.mean(self.s, loc=self.mu, scale=self.sigma)
     self.dispersion = lognorm.var(self.s, loc=self.mu, scale=self.sigma)
def lognormal(x, y):
    s, loc, scale = lognorm.fit(x)
    xmin = x.min()
    xmax = x.max()
    x = np.linspace(xmin, xmax, len(x))
    pdf = lognorm.pdf(x, s, scale=scale)
    yres = pdf
    print("Sum of squared difference (log-normal): ", np.sum((y-yres)**2))
    return yres
Пример #18
0
    def _boots(self, df, newx, shape, scale, dist=lognorm):
        xr = lognorm.rvs(size=len(df['Prediction']),
                         s=shape,
                         loc=0,
                         scale=scale)
        this_shape, this_loc, this_scale = lognorm.fit(xr, floc=0)
        this_fit = dist.cdf(newx, s=this_shape, loc=0, scale=this_scale)

        return list(this_fit)
Пример #19
0
def plot_gh_distribution(N=100, dp=2.0, sds=(1, 2, 3)):
    fig, ax = plt.subplots(
        figsize=[2 * plotdl.latex_width_inch, plotdl.latex_height_inch])
    cbandwidth = 20  #N #20
    tga = np.zeros([len(sds), cbandwidth])
    gg = np.zeros([len(sds)])
    for n, (sd, tg) in enumerate(zip(sds, tga)):
        m = models.Model_Anderson_DD_1d(number_of_points=N,
                                        dis_param=dp,
                                        periodic=False,
                                        bandwidth=1,
                                        prng=np.random.RandomState(sd))
        ## TAKE ONLY CENTER OF BAND:
        crange = np.arange(N // 2 - cbandwidth // 2, N // 2 + cbandwidth // 2)
        #crange = (abs(m.eig_vals).copy().argsort())[:4]
        #debug((crange, np.arange(N//2 -cbandwidth//2, N//2 + cbandwidth//2)))
        ga = N * phys_functions.ga(m.eig_matrix[:, [crange]])
        tg[:] = ga
        gg[n] = abs(phys_functions.A_matrix_inv(m.rate_matrix, 1, 1.57))**2
        #p = s_cummulative_plot(ax, ga)

    mi, ma, theo = phys_functions.ga_cdf(lyap_gamma(1, dp), N)
    ghs = np.logspace(np.log10(mi), np.log10(ma) + 1, 1000)
    logg = logavg(tga, axis=0)
    s_cummulative_plot(ax, logg)
    debug((logg.shape, tga.shape))
    p = s_cummulative_plot(ax, tga)
    sh, lo, sc = lognorm.fit(tga.flat)
    mu = np.log(sc)
    stdv = sh
    debug((sh, lo, sc))
    #lg = lognorm()
    xspace = np.logspace(np.log10(tga.min()), np.log10(tga.max()))
    ax.plot(xspace,
            norm.cdf(np.log(xspace), [stdv], loc=mu, scale=stdv),
            '-.',
            color='cyan')
    ax.plot(ghs, theo(ghs), color='black')
    #return ghs, theo
    #tga_sum = np.nansum(tga,axis=1)
    tga_avg = np.average(tga, axis=1)
    ax.axvline(logavg(gg))
    ax.axvline(np.average(gg), ls='--')
    ax.axvline(logavg(tga_avg), ls='-', color='red')
    ax.axvline(np.average(tga), ls='-', color='magenta')
    ax.axvline(logavg(tga[tga > 1e-100]), ls=':', color='red')
    ax.axvline(np.exp(-2 * N * lyap_gamma(1, dp)), ls='--', color='green')
    #ax.axvline(4*(lyap_gamma(1,dp)**2)*N*np.exp(-2*N*lyap_gamma(1,dp)), ls='--', color='green')
    print(2 * N * lyap_gamma(1, dp))
    ax.set_xscale('log')
    #ax.set_xlim(1e-10, 10)
    ax.set_ylim(1e-20, 1)
    ax.set_title("W = {}, $\gamma$ = {:.2}, N = {}".format(
        dp, lyap_gamma(1, dp), N))
    ax.xaxis.set_major_locator(LogLocator(numdecs=10))
    mkdir_and_savefig(fig, 'plots/pta_gh_dist.png')
Пример #20
0
def lognorm_fit(x, xdata=None):
    '''
    Fit a log normal distribution to the data. Uses actual input data and is 
    slower as a result.
    '''
    if xdata is None:
        xdata = x
    shape, loc, scale = lognorm.fit(xdata, loc=1)
    pdf = lognorm.pdf(x, shape, loc, scale)
    return pdf, np.log(scale), shape
Пример #21
0
def plot_hist(indiv, datas):
    x = np.array(datas)
    unique, counts = np.unique(x, return_counts=True)
    shape, loc, scale = lognorm.fit(x, floc=0)
    x2 = np.linspace(min(unique), max(unique))
    p = lognorm.pdf(x2, shape, loc=loc, scale=scale)
    plt.clf()
    plt.hist(x, normed=True, bins=50)
    plt.plot(x2, p, 'k')
    plt.savefig("hist_{}.pdf".format(indiv))
Пример #22
0
def plotIntensityHistogram(ax,
                           sources,
                           freq_mhz,
                           num_bins,
                           label,
                           color="#338768",
                           fit=False):
    intensity_attrib = intensityAttrib(freq_mhz)

    #if the frequency has not been recorded, then no such column will exist so return
    try:
        peak_intensity = sources[intensity_attrib]
    except:
        print("Intensities for frequency " + str(freq_mhz) +
              "MHz have not been recorded")
        return
    #filtering out negative intensities
    peak_intensity = peak_intensity[peak_intensity > 0]

    #creating the log-scaled bins
    bin_max = np.log10(peak_intensity.max())
    bin_min = np.log10(peak_intensity.min())
    bins = 10**np.linspace(bin_min, bin_max, num_bins)

    #set axis label and x scale
    ax.set_xlabel(createLabel(intensity_attrib))
    ax.set_ylabel("Frequency")
    ax.set_xscale("log")

    counts, bin_edges, ignored = ax.hist(peak_intensity,
                                         bins=bins,
                                         rwidth=1.0,
                                         color=color,
                                         label=str(freq_mhz) + "MHz " + label)
    #draw best fit logarithmic Gaussian curve
    if fit:
        shape, loc, scale = lognorm.fit(peak_intensity, floc=0)
        bins_log_len = np.r_[bin_edges[1:] - bin_edges[:-1], 0]
        # get pdf-values for same intervals as histogram
        samples_fit_log = lognorm.pdf(bins, shape, loc=loc, scale=scale)
        # plot the fit line
        ax.plot(bins,
                samples_fit_log * bins_log_len * counts.sum(),
                'k-',
                label="Fit line " + label,
                linewidth=2)

        #display mean and std dev in a textbox
        mean = round(scale, 2)
        std = round(np.log10(shape), 2)
        ax.legend((dummyObj(), dummyObj()),
                  ("Mean = " + str(mean), "SD = " + str(std)))

    ax.legend(loc="lower left", bbox_to_anchor=(0.1, 1.01))
    print("Plotted the histogram")
Пример #23
0
def index():
    req = request.json
    #print(req)
    data = req['times']
    shape, loc, scale = lognorm.fit(data, floc=0)
    fitted = lognorm(shape, loc, scale)

    res = fitted.ppf(0.05)

    #return req
    return str(res)
Пример #24
0
    def test_fit(self):
        p = generic.fit(self.da, 'lognorm')

        assert p.dims[0] == 'dparams'
        assert p.get_axis_num('dparams') == 0
        p0 = lognorm.fit(self.da.values[:, 0, 0])
        np.testing.assert_array_equal(p[:, 0, 0], p0)

        # Check that we can reuse the parameters with scipy distributions
        cdf = lognorm.cdf(.99, *p.values)
        assert cdf.shape == (self.nx, self.ny)
Пример #25
0
def continuous():
    """Fit distributions to symptoms' duration data."""
    # fetch data
    x = _symptoms_data()
    # fit distributions
    return {
        'x': x,
        'norm': norm.fit(x),
        'lognorm': lognorm.fit(x, floc=0),
        'gamma': gamma.fit(x, floc=0)
    }
Пример #26
0
    def test_fit(self):
        p = generic.fit(self.da, "lognorm")

        assert p.dims[0] == "dparams"
        assert p.get_axis_num("dparams") == 0
        p0 = lognorm.fit(self.da.values[:, 0, 0])
        np.testing.assert_array_equal(p[:, 0, 0], p0)

        # Check that we can reuse the parameters with scipy distributions
        cdf = lognorm.cdf(0.99, *p.values)
        assert cdf.shape == (self.nx, self.ny)
        assert p.attrs["estimator"] == "Maximum likelihood"
Пример #27
0
def plot_fit(tau):
    norm = 1. / tau * (tau + 2. / 3.)**2. * 3. / np.pi**2.
    print(tau)
    print(norm)

    # Load photon data, take care of float errors in r
    data = np.loadtxt(
        '../outputs/escape/escape_photons_nphot1e6/exit_photons_tau{}.dat'.
        format(int(tau)),
        skiprows=1)
    data[:, 0] = np.round(data[:, 0], 5)

    # Set up figure, make initial histogram, normalize x and y
    fig, ax = plt.subplots(1, 1, dpi=180)
    n, bins, patches = ax.hist(data[:, 6],
                               bins=50,
                               color='k',
                               histtype='step',
                               density=True,
                               range=(0, 15))
    bins = bins / norm
    n = n * norm

    # Calculate new bin positions, check normalization sum, clear old histogram
    bincenters = 0.5 * (bins[1:] + bins[:-1])
    print(np.sum(n * np.diff(bins)))
    plt.cla()

    # Scatter plot of the new bin positions and normalized counts
    ax.scatter(bincenters, n, color='k', s=3)
    ax.set_xlabel('Distance')
    ax.set_ylabel('n (normalized)')
    ax.set_yscale('log')
    ax.set_title(r'Total Distance Traveled, $\tau = {}$, $n = 10^6$'.format(
        int(tau)))

    # Calculate probability density from Shane's series solution code
    prob = np.zeros(np.shape(bincenters))
    for i in range(len(bincenters)):
        prob[i] = prob_ct_tau(bincenters[i], tau)
    ax.plot(bincenters, prob, 'b--', label='Series Solution', alpha=0.5)

    # Fit a log normal distribution to the normalized data
    shape, loc, scale = lognorm.fit(data[:, 6] / norm, loc=1)
    pdf = lognorm.pdf(bincenters, shape, loc, scale)
    ax.plot(bincenters, pdf, 'r--', label='Log Normal', alpha=0.5)

    # Save or show the plot
    plt.legend()
    plt.savefig('../outputs/escape/fit_plots_nphot1e6/fit_tau{}.pdf'.format(
        int(tau)))
    plt.close()
    return np.log(scale), shape
Пример #28
0
def lognormFit(series):
    '''对series(pd.Series或np.array)进行对数正态分布参数估计'''
    # stats中lognorm分布参数估计和lognormPdf中参数关系:
    # 若设置floc=0(即始终loc=0),则有s = sigma,scale = e ^ mu

    # mu = np.log(series).mean()
    # sigma = np.log(series).std(ddof=0)

    s, loc, scale = lognorm.fit(series, floc=0)
    sigma, mu = s, np.log(scale)

    return mu, sigma
Пример #29
0
def get_lognormal_para(name):
    name = 'Inflow'
    df = pd.read_excel('./model_WWT/SDD_N_P_2012-2019.xlsx',
                       parse_dates=['Date'],
                       index_col='Date',
                       sheet_name=1)
    start_date = '2012-08-01'
    end_date = '2019-8-15'
    mask = (df.index > start_date) & (df.index <= end_date)
    df = df.loc[mask]

    if name == 'NH3':
        data = df.iloc[:, 0]
        data = data.replace(0, np.nan)
        data = data.dropna()
    elif name == 'TP':
        data = df.iloc[:, 3]
        data = data.replace(0, np.nan)
        data = data.dropna()
    elif name == 'Inflow':
        data = df.iloc[:, 2]
        data = data.replace(0, np.nan)
        data = data.dropna()
        data = pd.DataFrame(data)
        data.iloc[:, 0].value_counts()
        data = data[
            data.iloc[:, 0] !=
            30]  # inflow data used is only sewage flow, 30 MGD needs to be removed.
        data = np.array(data)
    else:
        print('wrong inputs')

    parm = lognorm.fit(
        data, floc=0)  #parm[0] = sigma; parm[1]=location, 0; parm[2]=median, m
    sigma = parm[0]
    mu = np.log(parm[2])  # mu is not equal to arithmetic mean
    mean = np.exp(mu + 1 / 2 * (sigma**2))
    mean_data = data.mean()
    median = np.exp(mu)
    cv = np.sqrt(np.exp(sigma**2) - 1)
    sd = mean * np.sqrt(np.exp(sigma**2) - 1)

    return {
        'mu': mu,
        'sigma': sigma,
        'cv': cv,
        'median (scale, m)': parm[2],
        'mean (E[X])': mean,
        'mean_realdata': mean_data,
        'SD[X]': sd,
        'location': parm[1]
    }
Пример #30
0
def calculate_story_distribution(fd_id):
    """
    Using the department in combination with similar departments, calculate the story distribution of structures in
    owned census tracts.  Only medium and high risk structures are included in the calculations.
    """

    MAX_STORIES = 108

    try:
        fd = FireDepartment.objects.get(id=fd_id)
        cursor = connections['nfirs'].cursor()
    except (FireDepartment.DoesNotExist, ConnectionDoesNotExist):
        return

    geoms = list(fd.similar_departments.filter(owned_tracts_geom__isnull=False).values_list('owned_tracts_geom', flat=True))
    geoms.append(fd.owned_tracts_geom)

    FIND_STORY_COUNTS = """SELECT count(1), p.story_nbr
    FROM parcel_stories p
    JOIN "LUSE_swg" lu ON lu."Code" = p.land_use,
    (SELECT g.owned_tracts_geom FROM (VALUES {values}) AS g (owned_tracts_geom)) owned_tracts
    WHERE lu.include_in_floor_dist AND lu.risk_category = %(level)s
    AND ST_Intersects(owned_tracts.owned_tracts_geom, p.wkb_geometry)
    GROUP BY p.story_nbr
    ORDER BY count DESC, p.story_nbr;"""

    values = ','.join(['(ST_SetSRID(\'{}\'::geometry, 4326))'.format(geom.hex) for geom in geoms])
    mapping = {2: 'Medium', 4: 'High'}

    def expand(values, weights):
        ret = []
        for v in zip(values, weights):
            ret = ret + [v[0]] * v[1]
        return ret

    for nlevel, level in mapping.items():
        cursor.execute(FIND_STORY_COUNTS.format(values=values), {'level': level})
        res = cursor.fetchall()

        # Filter out `None` story counts and obnoxious values
        a = filter(lambda x: x[1] is not None and x[1] <= MAX_STORIES, res)
        weights = map(lambda x: x[0], a)
        vals = map(lambda x: x[1], a)

        expanded = expand(vals, weights)
        samples = np.random.choice(expanded, size=1000)
        samp = lognorm.fit(samples)

        # Fit curve to story counts
        rm = fd.firedepartmentriskmodels_set.get(level=nlevel)
        rm.floor_count_coefficients = {'shape': samp[0], 'loc': samp[1], 'scale': samp[2]}
        rm.save()
Пример #31
0
def createHisto(A, title='', xlabel='', unit=''):
    """
    Generates one histogram of the given data.
    """
    
    fig = plt.figure()
    ax = plt.subplot(111)
    n, bins, patches = plt.hist(A, _NUMBER_OF_HISTO_BARS, range=(0, A.max()), normed=0, \
            weights=np.zeros_like(A)+1./A.size, facecolor='cyan', alpha=0.4, label=' ')
    
    # set min and max values to return
    values = {}
    values['min'] = A.min()
    values['minrf'] = n[np.nonzero(n)][0]
    values['max'] = A.max()
    values['maxrf'] = n[-1]
    numbers = title+"\nx: "+str(bins[1:])+"\ny: "+str(n)+"\n\n"
    # 'best fit' line
    shape, loc, scale = lognorm.fit(A, floc=0) # Fit a curve to the variates
    x = np.linspace(0, 1.2 * A.max(), num=500)
    # scaling
    binlength = bins[1] - bins[0]
    alpha = factorize(n, binlength)
    
    # plot functions
    simplefilter("ignore", RuntimeWarning)  # avoid warning in this method
    plt.plot(bins[1:], n, 'c^', alpha=0.5, label='Distribution')
    plt.plot(x, alpha * (lognorm.pdf(x, shape, loc=0, scale=scale)), 'c--', label='Fit')
    axe = plt.axis()
    newaxe =(axe[0], 1.2 * A.max(), axe[2], axe[3])
    plt.axis(newaxe)
    plt.title(title)
    plt.ylabel(u'Relative frequency ' + r'$\left[\mathrm{\mathsf{ \frac{N}{\Sigma N} }}\right]$')
    plt.xlabel(xlabel)
    simplefilter("default", RuntimeWarning)
    
    # position the legend
    handles, labels = ax.get_legend_handles_labels()
    indexL3 = labels.index(' ')
    labelsL3 = [labels[indexL3]]
    handlesL3 = [handles[indexL3]]
    del labels[indexL3]
    del handles[indexL3]
    l1 = plt.legend(handlesL3, labelsL3, prop={'size':12}, bbox_to_anchor=(0.72, 0.99), loc=2, frameon=0)
    plt.legend(handles, labels, prop={'size':12}, bbox_to_anchor=(0.72, 0.99), loc=2, frameon=0)
    plt.gca().add_artist(l1)
    currentaxis = fig.gca()
    legendText = '$\mathrm{\mathsf{\mu =}}$ %4.2f '+unit+'\n$\mathrm{\mathsf{\sigma =}}$ %4.2f '+unit
    plt.text(0.96, 0.86, legendText % (scale, (shape * scale)), horizontalalignment='right', \
            verticalalignment='top', transform=currentaxis.transAxes)
    plt.minorticks_on()
    return fig, values, numbers
Пример #32
0
def axplot(i_data,linkid,intervalindex,ax):
    ax.cla()
    ax.hist(i_data, numberofbin,inputrange,normed=1,label='original distribution')
    # start to fit
    shape,location,scale=lognorm.fit(i_data,floc=0)
    rv=lognorm(shape,location,scale)
    x=np.linspace(0,140,100)
    ax.plot(x, rv.pdf(x),             
             'r-', lw=5, alpha=0.6, label='lognorm fit')
    ax.set_title("Link id "+ str(linkid)+" and time Interval:  " + formatinterval(intervalindex))
    ax.set_xlabel('Travel Time (s)')
    ax.set_ylabel('Probability Density') 
    ax.legend()    
def FitPrice(data):
	priceData = data[:,6]
	priceData = priceData[~sp.isnan(priceData)]
	shape, loc, scale = lognorm.fit(priceData,loc = 0)
	x = np.linspace(0, 100, 100)
	p = lognorm.pdf(x, shape, loc, scale)
	maxIndex = 0
	for i in range(0, len(p)):
		if p[i] >= p[maxIndex]:
			maxIndex = i
		else:
			break; # if the plot goes down, stop searching.
	return x[maxIndex]
def FitPrice(data):
	priceData = data[:,6]
	priceData = priceData[~sp.isnan(priceData)]
	shape, loc, scale = lognorm.fit(priceData,loc = 0)
	x = np.linspace(0, 100, 100)
	p = lognorm.pdf(x, shape, loc, scale)
	maxIndex = 0
	for i in range(0, len(p)):
		if p[i] >= p[maxIndex]:
			maxIndex = i
		else:
			break; # if the plot goes down, stop searching.
	return x[maxIndex]
Пример #35
0
def hist(x, weights=None, bins=10, distname='normal', color='b', label='pdf', filename=None):
	# create full data using weights
	z = x
	if weights is not None:
		z = np.zeros(sum(weights))
		j = 0
		for i in range(weights.size):
			for k in range(j, j + weights[i]):
				z[j] = x[i]
				j += 1

	# histogram
	hist, bins = np.histogram(x, bins=bins, density=True, weights=weights)

	# fit distribution
	if distname is 'normal':
		(mu, sigma) = norm.fit(z)
		pdf = lambda x: norm.pdf(x, mu, sigma)
	elif distname is 'lognormal':
		sigma, loc, scale = lognorm.fit(z, floc=0)
		mu = np.log(scale)
		pdf = lambda x: lognorm.pdf(x, sigma, loc, scale=scale)
	elif distname is not None:
		raise Exception('Unsupported distribution name ' + distname)

	# plot distribution
	if (distname is not None):
		x = np.linspace(bins[0], bins[-1], 100)
		y = pdf(x)
		label = 'm=%2.1f, s=%2.1f [%s]' % (mu, sigma, label)
		plt.plot(x, y, linewidth=3, label=label, alpha=0.7, color=color)

	# plot histogram
	c = (bins[:-1] + bins[1:]) / 2;  # bins centers
	plt.plot(c, hist, marker='s', alpha=0.7, markersize=8, linestyle='None', color=color)

	# format plot	
	plt.xticks(fontsize=14)
	plt.yticks(fontsize=14)
	plt.ylabel('PDF', fontsize=16)

	if (filename is not None):
		print('Saving figure ' + filename)
		plt.savefig(filename, bbos_inches='tight')
Пример #36
0
rnd_a += np.random.normal(0,noise_size,1000)
rnd_b += np.random.normal(0,noise_size,1000)
rnd_L += np.random.normal(0,noise_size,1000)

cntr_a = a_pchip(cmpos)
cntr_b = b_pchip(cmpos)
cntr_L = L_pchip(cmpos)

delta_a = cntr_a - rnd_a
delta_b = cntr_b - rnd_b
delta_L = cntr_L - rnd_L

delta_E = sqrt(square(delta_a) +square(delta_b) + square(delta_L))


# plot them green
rndplt=ax.scatter3D(rnd_a,rnd_b,rnd_L,marker='*',c='green',s=50,linewidth=1)

# histogramm delta E

plt.figure()

n, bins, patches = plt.hist(delta_E,bins=50,color='blue',normed=True,histtype='bar')
lnrm_shape, lnrm_loc, lnrm_scale = lognorm.fit(delta_E)

x= np.linspace(0, delta_E.max(), num=400)
y = lognorm.pdf(x,lnrm_shape,loc=lnrm_loc,scale=lnrm_scale)

pdflne=plt.plot(x,y,'r--',linewidth=2)

Пример #37
0
from scipy import stats
from scipy.stats import lognorm
rrr=lognorm.rvs(10,loc=0,scale=2,size=1000)
print rrr[1:10]
print "log normal fit", lognorm.fit(rrr,5,loc=0,scale=3)
rrr[1:10]
from numpy import rint
from numpy import around
ppp = around(rrr)
print ppp[1:10]
print lognorm.fit(ppp,5,loc=0,scale=3)

import matplotlib.pyplot as plt
from scipy.stats import norm
from scipy.stats import lognorm
import math
from scipy.interpolate import UnivariateSpline
import sys

data = sp.genfromtxt(sys.argv[1], delimiter=",")

freq = {}

priceData = data[:, 4]

priceData = priceData[~sp.isnan(priceData)]

shape, loc, scale = lognorm.fit(priceData,loc = 0)

plt.hist(priceData, bins=100, normed=True, alpha=0.6, color='g')
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = lognorm.pdf(x, shape, loc, scale)
print(p)
print(x)
maxIndex = 0
for i in range(0, len(p)):
	if p[i] >= p[maxIndex]:
		maxIndex = i
	else:
		break; # if the plot goes down, stop searching.
maxX = x[maxIndex]
plt.plot(x, p, 'k', linewidth=2)
Пример #39
0
def distfit(n,dists,title,ra,dec,fwhm, dm):
	import numpy as np
	import matplotlib.pyplot as plt
	# from scipy.optimize import curve_fit
	from scipy.stats import lognorm
	from scipy import ndimage
	
	# n = 279
	bins = 165
	width = 22 
	# fwhm = 2.0
	sig = ((bins/width)*fwhm)/2.355
	valsLP = []
	for i in range(25000) :
		random_ra = ra*np.random.random_sample((n,))
		random_dec = dec*np.random.random_sample((n,))
		random_xy = zip(random_ra,random_dec)
		grid_r, xedges_r, yedges_r = np.histogram2d(random_dec, random_ra, bins=[bins,bins], range=[[0,width],[0,width]])
		hist_points_r = zip(xedges_r,yedges_r)
		grid_gaus_r = ndimage.filters.gaussian_filter(grid_r, sig, mode='constant', cval=0)
		S_r = np.array(grid_gaus_r*0)
		
		grid_mean_r = np.mean(grid_gaus_r)
		grid_sigma_r = np.std(grid_gaus_r)
		S_r = (grid_gaus_r-grid_mean_r)/grid_sigma_r
		
		x_cent_r, y_cent_r = np.unravel_index(grid_gaus_r.argmax(),grid_gaus_r.shape)
		valsLP.append(S_r[x_cent_r][y_cent_r])
	# valsLP = np.loadtxt('valuesLeoP.txt', usecols=(0,), unpack=True)
	# vals = np.loadtxt('values.txt', usecols=(0,), unpack=True)
	
	# bins, edges = np.histogram(vals, bins=400, range=[2,22], normed=True)
	# centers = (edges[:-1] + edges[1:])/2.
	# plt.scatter(centers, bins, edgecolors='none')
	
	x = np.linspace(2, 22, 4000)
	
	# al,loc,beta=lognorm.fit(vals)
	# print al, loc, beta
	# # plt.plot(x, lognorm.pdf(x, al, loc=loc, scale=beta),'r-', lw=5, alpha=0.6, label='lognormal AGC198606')
	# print lognorm.cdf(dists, al, loc=loc, scale=beta)
	
	bins, edges = np.histogram(valsLP, bins=400, range=[2,22], normed=True)
	centers = (edges[:-1] + edges[1:])/2.
	
	
	# x = np.linspace(2, 22, 4000)
	# dists = np.array([3.958,3.685,3.897,3.317])
	al,loc,beta=lognorm.fit(valsLP)
	# print al, loc, beta
	plt.plot(x, lognorm.pdf(x, al, loc=loc, scale=beta),'r-', lw=2, alpha=0.6, label='lognormal distribution')
	print 'Significance of detection:','{0:6.3f}%'.format(100.0*lognorm.cdf(dists, al, loc=loc, scale=beta))
	
	plt.scatter(centers, bins, edgecolors='none', label='histogram of $\sigma$ from 25000 \nuniform random samples')
	# print chisqg(bins, lognorm.pdf(centers, al, loc=loc, scale=beta))
	
	
	ax = plt.subplot(111)
	plt.plot([dists,dists],[-1.0,2.0],'k--', lw=2, alpha=1.0, label='best '+title+' detection') 
	# plt.plot([4.115,4.115],[-1.0,2.0],'k--', lw=2, alpha=1.0, label='Leo P detection at 1.74 Mpc')
	# plt.plot([3.897,3.897],[-1.0,2.0],'k-', lw=5, alpha=0.6, label='d=417 kpc')
	# plt.plot([3.317,3.317],[-1.0,2.0],'k-', lw=5, alpha=0.4, label='d=427 kpc')
	plt.ylim(0,1.1)
	plt.xlim(2,12)
	plt.xlabel('$\sigma$ above local mean')
	plt.ylabel('$P(\sigma = X)$')
	plt.legend(loc='best', frameon=True)
	ax.set_aspect(3)
	# plt.show()
	plt.savefig(title+'_'+repr(dm)+'_'+repr(fwhm)+'_dist.pdf')
Пример #40
0
log_c = np.array ([np.log (i) for i in c])

log_a = (log_a-np.mean (log_a))/np.std (log_a)
log_b = (log_b-np.mean (log_b))/np.std (log_b)
log_c = (log_c-np.mean (log_c))/np.std (log_c)

print kstest (log_a, 'norm')
print kstest (log_b, 'norm')
print kstest (log_c, 'norm')

plb.hist (b)
plb.hist (log_b, bins=20)
plb.hist (a, bins=100)
plb.hist (log_a, bins=10)

shape, loc, scale = lognorm.fit(a)
rnd_a = lognorm.rvs(shape, scale=scale, loc=loc, size=len(a))
plb.hist(rnd_a, bins=20, alpha=0.5)
plb.hist(a, bins=20, color='r', alpha=0.5)

shape, loc, scale = lognorm.fit(c)
rnd_c = lognorm.rvs(shape, scale=scale, loc=loc, size=len(c))
plb.hist(rnd_c, bins=30, alpha=0.5)
plb.hist(c, bins=30, color='r', alpha=0.5)

shape, loc, scale = lognorm.fit(b)
rnd_b = lognorm.rvs(shape, scale=scale, loc=loc, size=len(b))
plb.hist(rnd_b, bins=20, alpha=0.5)
plb.hist(b, bins=20, color='r', alpha=0.5)

np.mean (b)
Пример #41
0
def test_z(filename, uncorr_algo, distbn_to_fit):
    '''test case for pdz domain proteins'''
    algn = read_free(filename)

    # truncate alignments to sequence positions with
    # gap frequency no greater than 20% - to avoid over-representation of gaps
    alignments = truncate(algn, FRAC_ALPHA_CUTOFF)
    print alignments.shape
    pdb_res_list = read_pdb(PDZ_PDB_FILE, 'A')
    msa_algn = msa_search(pdb_res_list, alignments)
    print msa_algn
    sca_algn = sca(alignments)
    algn_shape = get_algn_shape(algn)
    no_pos = alignments.shape[1]
    no_seq = algn_shape.no_seq
    no_aa = algn_shape.no_aa
    print 'Testing SCA module :'
    print 'algn_3d_bin hash :' + str(np.sum(np.square(sca_algn.algn_3d_bin)))
    print 'weighted_3d_algn hash :' +\
        str(np.sum(np.square(sca_algn.weighted_3d_algn)))
    print 'weight hash : ' + str(np.sum(np.square(sca_algn.weight)))
    print 'pwX hash : ' + str(np.sum(np.square(sca_algn.pwX)))
    print 'pm hash : ' + str(np.sum(np.square(sca_algn.pm)))
    print 'Cp has : ' + str(np.sum(np.square(sca_algn.Cp)))
    print 'Cs hash : ' + str(np.sum(np.square(sca_algn.Cs)))
    spect = spectral_decomp(sca_algn, 100)
    print 'spect lb hash : ' + str(np.sum(np.square(spect.pos_lbd)))
    print 'spect ev hash : ' + str(np.sum(np.square(spect.pos_ev)))
    print 'spect ldb_rnd hash : ' + str(np.sum(np.square(spect.pos_lbd_rnd)))
    print 'spect ev hash : ' + str(np.sum(np.square(spect.pos_ev_rnd)))

    svd_output = LA.svd(sca_algn.pwX)
    U = svd_output[0]
    sv = svd_output[1]
    V = svd_output[2]

    # calculate the matrix Pi = U*V'
    # this provides a mathematical mapping between
    # positional and sequence correlation

    n_min = min(no_seq, no_pos)
    print U.shape
    print V.shape
    print n_min
    Pi = dot(U[:, 0:n_min], transpose(V[:, 0:n_min]))
    U_p = dot(Pi, spect.pos_ev)

    distbn = get_distbn(distbn_to_fit)
    pd = lognorm.fit(spect.pos_ev[:, 0], floc=0)
    # floc = 0 holds location to 0 for fitting
    print pd

    p_cutoff = 0.8  # cutoff for the cdf
    xhist = arange(0, 0.4, 0.01)
    x_dist = arange(min(xhist), max(xhist), (max(xhist) - min(xhist))/100)
    cdf = lognorm.cdf(x_dist, pd[0], pd[1], pd[2])
    # Use case : lognorm.cdf(x, shape, loc, scale)

    jnk = min(abs(cdf - p_cutoff))
    x_dist_pos_right = np.argmin(abs(cdf-p_cutoff))
    cutoff_ev = x_dist[x_dist_pos_right]
    sector_def = np.array(np.where(spect.pos_ev[:, 0] > cutoff_ev)[0])[0]
    print 'sector definition :'
    print sector_def
Пример #42
0
		random_ra = 20.0*np.random.random_sample((n,))
		random_dec = 20.0*np.random.random_sample((n,))
		random_xy = zip(random_ra,random_dec)
		grid_r, xedges_r, yedges_r = np.histogram2d(random_dec, random_ra, bins=[bins,bins], range=[[0,width],[0,width]])
		hist_points_r = zip(xedges_r,yedges_r)
		grid_gaus_r = ndimage.filters.gaussian_filter(grid_r, sig, mode='constant', cval=0)
		S_r = np.array(grid_gaus_r*0)
		
		grid_mean_r = np.mean(grid_gaus_r)
		grid_sigma_r = np.std(grid_gaus_r)
		S_r = (grid_gaus_r-grid_mean_r)/grid_sigma_r
		
		x_cent_r, y_cent_r = np.unravel_index(grid_gaus_r.argmax(),grid_gaus_r.shape)
		sig_values_r.append(S_r[x_cent_r][y_cent_r])
		# print >> f1, S_r[x_cent_r][y_cent_r]
	al,loc,beta=lognorm.fit(sig_values_r)
	alphas.append(al)
	betas.append(beta)
	locs.append(loc)
	# pct_calc = [sig_values_r[i] for i in range(len(sig_values_r)) if (sig_values_r[i] < S_th)]
	# percentile = (float(len(pct_calc))/1000.0)*100.0
	# print n, S_th, percentile

ax0 = plt.subplot(2,2,1)	
plt.scatter(ns,alphas,c='r', edgecolors='none')
# plt.ylim(0,1.1)
# plt.xlim(2,12)
plt.xlabel('sample size')
plt.ylabel('alpha')

ax1 = plt.subplot(2,2,2)