Пример #1
0
def testScipyExponential():
    data0 = expon.rvs(scale=10, size=1000)
    ###################
    data = data0
    plt.figure()
    x = np.linspace(0, 100, 100)
    plt.hist(data, bins=x, normed=True)
    plt.plot(x, expon.pdf(x, loc=0, scale=10), color='g')

    #loc, scale = expon.fit(data, floc=0)
    #plt.plot(x, expon.pdf(x, loc=loc, scale=scale), color='r')

    removedHeadLength = 1.0
    dataNoHead = [v for v in data if v > removedHeadLength]
    loc1, scale1 = expon.fit(dataNoHead)
    plt.plot(x, expon.pdf(x, loc=0, scale=scale1), color='b')
    loc, scale = expon.fit(dataNoHead, floc=removedHeadLength)
    plt.plot(x, expon.pdf(x, loc=0, scale=scale), color='r')

    # non-normed graphs
    # plt.figure()
    # plt.hist(data0, bins=x, normed=False)
    # plt.plot(x, expon.pdf(x, loc=0, scale=10)*len(data0), color='r')

    plt.figure()
    plt.hist(dataNoHead, bins=x, normed=False)
    # s = len(dataNoHead) / sInvNormalisation = intergral(pdf, removedHeadLength, infty)
    int_0_removedHeadLength_expon = np.exp(- float(removedHeadLength) / scale)
    s = len(dataNoHead) / int_0_removedHeadLength_expon
    s0 = len(data0) / 1.0
    print >> sys.stderr, s0, len(dataNoHead), s
    plt.plot(x, expon.pdf(x, loc=0, scale=scale)*s, color='r')
##############################################################
# deprecated
##############################################################

# non-linear fit
#A, K, C = fit_exp_nonlinear(t, noisy)

# linear fit with the constant set to 0
# C = 0
# A, K = fit_exp_linear(t, noisy, C)
# ysModel = model_func(t, A, K, C)

#plt.tight_layout()
#plt.xlim(0, 100)
#plt.title("OSB length distribution in Human-Mouse comparison \n Confidence Interval : %s*sigma around mean" % arguments["ICfactorOfSigma"] )
#plt.title("")
# #plt.legend()
#plt.savefig(sys.stdout, format='svg')
Пример #2
0
 def daywise_training_data(self, d, combine, fac1, fac2, f1, days,
                           orignal_start_slot):
     # fac2 is out internal slots that are combined
     # it is also worth noting that we calculate the average for combined slots and then put them for
     # all the slots for that given duration
     if self.combined_slots:
         x = fac2[(fac1 == f1)]
         day = days[(fac1 == f1)]
         model_d = []
         for day_i in np.unique(day):
             model_d_temp = []
             for t_i in np.unique(x):
                 try:
                     model_d_temp.append([[
                         t_i,
                         expon.fit(
                             pd.to_numeric(d[(x == t_i)
                                             & (day == day_i)]))[1], day_i
                     ]])
                 except:
                     continue
             model_d_temp = np.vstack(model_d_temp)
             scale_val = model_d_temp[(
                 model_d_temp[:, 0] == combine[0])].flatten()[1]
             add = [[i, scale_val, day_i] for i in combine[1:]]
             model_d_temp = np.concatenate((model_d_temp, add))
             model_d.append(model_d_temp)
         model_d = np.vstack(model_d)
     else:
         x = orignal_start_slot[(fac1 == f1)]
         day = days[(fac1 == f1)]
         model_d = []
         for day_i in np.unique(day):
             model_d_temp = []
             for t_i in np.unique(x):
                 try:
                     model_d_temp.append([[
                         t_i,
                         expon.fit(
                             pd.to_numeric(d[(x == t_i)
                                             & (day == day_i)]))[1], day_i
                     ]])
                 except:
                     continue
             model_d_temp = np.vstack(model_d_temp)
             model_d.append(model_d_temp)
         model_d = np.vstack(model_d)
     return model_d
Пример #3
0
    def testExponOneEvent(self):
        """
        generate and fit an exponential distribution with lifetime of 25
        make a plot in testExpon.png
        """
        tau = 25.0
        nBins = 400
        size = 100
        x = range(nBins)
        timeHgValues = np.zeros(nBins, dtype=np.int64)
        timeStamps = expon.rvs(loc=0, scale=tau, size=size)
        ts64 = timeStamps.astype(np.uint64)
        tsBinner.tsBinner(ts64, timeHgValues)

        param = expon.fit(timeStamps)
        fit = expon.pdf(x,loc=param[0],scale=param[1])
        fit *= size
        tvf = timeHgValues.astype(np.double)
        tvf[tvf<1] = 1e-3 # the plot looks nicer if zero values are replaced
        plt.plot(x, tvf, label="data")
        plt.plot(x, fit, label="fit")
        plt.yscale('log')
        plt.xlim(xmax=100)
        plt.ylim(ymin=0.09)
        plt.legend()
        plt.title("true tau=%.1f   fit tau=%.1f"%(tau,param[1]))
        plt.savefig(inspect.stack()[0][3]+".png")
Пример #4
0
    def returnDistData(cls, self):
        gammaParam = gamma.fit(10**(self.data / 10))
        gammaDist = gamma.pdf(self.data, *gammaParam)

        rayleighParam = rayleigh.fit(self.data)
        rayleighDist = rayleigh.pdf(self.data, *rayleighParam)

        normParam = norm.fit(self.data)
        normDist = norm.pdf(self.data, *normParam)

        logNormParam = lognorm.fit(self.data)
        lognormDist = lognorm.pdf(self.data, *logNormParam)

        nakagamiParam = nakagami.fit(self.data)
        nakagamiDist = nakagami.pdf(self.data, *nakagamiParam)

        exponParam = expon.fit(self.data)
        exponDist = expon.pdf(self.data, *exponParam)

        exponweibParam = exponweib.fit(self.data)
        weibDist = exponweib.pdf(self.data, *exponweibParam)

        distDF = pd.DataFrame(np.column_stack([
            gammaDist, rayleighDist, normDist, lognormDist, nakagamiDist,
            exponDist, weibDist
        ]),
                              columns=[
                                  'gammaDist', 'rayleighDist', 'normDist',
                                  'lognormDist', 'nakagamiDist', 'exponDist',
                                  'weibDist'
                              ])
        self.distDF = distDF
Пример #5
0
def main():
    symbol = 'BTCUSDT'
    #symbols = ['BTCUSDT', 'ETHUSDT', 'LTCUSDT', 'ETHBTC', 'LTCBTC', 'LTCETH']
    #symbols = ['ETHUSDT', 'LTCUSDT', 'ETHBTC', 'LTCBTC', 'LTCETH']

    #trades = get_trades('BTCUSDT', datetime.datetime.timestamp(datetime.datetime(2019, 6, 1)) * 1000, 24 * 365)
    trades = get_trades(
        symbol,
        datetime.datetime.timestamp(datetime.datetime(2019, 6, 1)) * 1000,
        1200)

    with open(symbol + '.json', 'w') as f:
        json.dump(trades, f)

    previous_time = None

    interarrival_times = []

    for trade in trades:
        time = trade['time']

        if previous_time is not None:
            interarrival_times.append(time - previous_time)

        previous_time = time

    plt.hist(interarrival_times, 100, density=True)
    loc, scale = expon.fit(interarrival_times, loc=0)
    x = np.linspace(0, 2000, 100)
    plt.plot(x, expon.pdf(x, loc=loc, scale=scale))
    plt.show()
 def distribution_check(self, dist):
     #histogram and normal probability plot
     if dist == 'norm':
         sns.distplot(self.series, fit=norm)
         (mu, sigma) = norm.fit(self.series)
         print('\n mu = {:.2f} and sigma = {:.2f}\n'.format(mu, sigma))
         #Now plot the distribution
         plt.legend([
             'normal dist. ($\mu=$ {:.2f} and $\sigma=$ {:.2f} )'.format(
                 mu, sigma)
         ],
                    loc='best')
         plt.ylabel('Frequency')
         #plt.title('Series distribution')
         plt.show()
     if dist == 'expon':
         plt.clf()
         sns.distplot(self.series, fit=expon)
         (mu, sigma) = expon.fit(self.series)
         print('\n mu = {:.2f} '.format(sigma))
         #Now plot the distribution
         plt.legend(['expon dist. ($\mu=$ {:.2f}  )'.format(sigma)],
                    loc='best')
         plt.ylabel('Frequency')
         #plt.title('Series distribution')
         plt.show()
Пример #7
0
    def computeMeanSbLength(lSbsLengths, binwidth=None, minSbLength=None):
        # first fit an exponential of the form s * 1/theta * exp(-x/theta)
        # with s the scale factor = total #sbs
        loc, scale = expon.fit(lSbsLengths, floc=0.0)
        assert loc == 0.0

        #A, K = fit_exp_linear(t, v, 0)  # A exp(K t)
        # A = s * 1/theta
        #theta = -1.0/K

        # proportion of the missing normalised distribution between 0 and minSbLength
        # propMissing = 1.0 / np.exp(-minSbLength/scale) - 1.0
        # s = (propMissing + 1) * len(lSbsLengths)
        int_0_removedHeadLength_expon = np.exp(- float(minSbLength) / scale)
        s = len(lSbsLengths) / float(int_0_removedHeadLength_expon)

        theta = scale
        # print >> sys.stderr, "theta=", theta
        # print >> sys.stderr, "s=", s
        missingBins = list(np.arange(0, int(minSbLength), binwidth))
        nbMissingSbs = s * Exp(np.asarray([bin - binwidth/2.0 for bin in missingBins]), theta)
        missingNbSbsPerBin = zip(missingBins, nbMissingSbs)
        lSbsLengthsMissing = [(bin + random.random() * binwidth) for (bin, sbLength) in missingNbSbsPerBin for _ in range(int(sbLength))]
        lSbsLengthsc = list(lSbsLengthsMissing) + lSbsLengths
        meanSbLength = float(sum(lSbsLengthsc) / len(lSbsLengthsc))
        # TODO compare s and len(lSbsLengthsc)
        print >> sys.stderr, "meanSbLength=%s, scale=%s" % (meanSbLength, theta)
        print >> sys.stderr, "s=%s, len(lSbsLengthsc)=%s" % (s, len(lSbsLengthsc))
        return (meanSbLength, lSbsLengthsc, missingNbSbsPerBin)
Пример #8
0
    def testExpon(self):
        """
        generate and fit an exponential distribution with lifetime of 25
        make a plot in testExpon.png
        """
        tau = 25.0
        nBins = 400
        size = 100
        x = range(nBins)
        timeHgValues = np.zeros(nBins, dtype=np.int64)
        timeStamps = expon.rvs(loc=0, scale=tau, size=size)
        ts64 = timeStamps.astype(np.uint64)
        tsBinner.tsBinner(ts64, timeHgValues)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            # Note:  this line casus a RuntimeWarning in optimize.py:301
            param = expon.fit(timeStamps)

        fit = expon.pdf(x,loc=param[0],scale=param[1])
        fit *= size
        tvf = timeHgValues.astype(np.double)
        #tvf[tvf<1] = 1e-3 # the plot looks nicer if zero values are replaced
        plt.plot(x, tvf, label="data")
        plt.plot(x, fit, label="fit")
        plt.yscale('symlog', linthreshy=0.9)
        plt.xlim(xmax=100)
        plt.ylim(ymin = -0.1)
        plt.legend()
        plt.title("true tau=%.1f   fit tau=%.1f"%(tau,param[1]))
        plt.savefig(inspect.stack()[0][3]+".png")
Пример #9
0
def MBdist(
    n, e_photon, thick
):  # n: particle number, loct: start point(x-x0), scale: sigma, wl: wavelength,thick: thickness of the cathode
    assert e_photon > bandgap
    if e_photon - bandgap - 0.8 <= 0:
        scale = e_photon - bandgap
        loct = 0
    else:
        scale = 0.8
        loct = e_photon - bandgap - scale
    data = maxwell.rvs(loc=loct, scale=scale, size=n)
    data_ene = np.array(data)
    params = maxwell.fit(data, floc=0)
    data_v = np.sqrt(2 * data_ene * ec / me) * 10**9
    p2D = []
    wl = ((19.82 - 27.95 * e_photon + 11.15 * e_photon**2) * 10**-3)**-1
    pens = expon.rvs(loc=0, scale=wl, size=n)
    penss = filter(lambda x: x <= thick, pens)
    params_exp = expon.fit(pens, floc=0)
    i = 0

    for n in range(len(penss)):
        phi = random.uniform(0, 2 * math.pi)  # initial angular
        poy = random.uniform(-1 * 10**6,
                             1 * 10**6)  # initial y direction position
        p2D.append([
            penss[i], poy, data_v[i] * math.cos(phi),
            data_v[i] * math.sin(phi), data_v[i], data[i]
        ])  #p2D: (z,y,vz,vy,v,ene)
        i += 1
    p2D = np.array(p2D)
    return params, p2D, penss, params_exp
Пример #10
0
    def testExponManyEvents(self):
        """
        generate and fit an exponential distribution with lifetime of 25
        make a plot in testExponManyEvents.png
        """
        tau = 25.0
        nBins = 400
        size = 100
        taulist = []
        for i in range(1000):
            x = range(nBins)
            timeHgValues = np.zeros(nBins, dtype=np.int64)
            timeStamps = expon.rvs(loc=0, scale=tau, size=size)
            ts64 = timeStamps.astype(np.uint64)
            tsBinner.tsBinner(ts64, timeHgValues)
            
            param = expon.fit(timeStamps)
            fit = expon.pdf(x,loc=param[0],scale=param[1])
            fit *= size
            print "i=",i," param[1]=",param[1]
            taulist.append(param[1]) 

        hist,bins = np.histogram(taulist, bins=20, range=(15,25))
        width = 0.7*(bins[1]-bins[0])
        center = (bins[:-1]+bins[1:])/2
        plt.step(center, hist, where = 'post')
        plt.savefig(inspect.stack()[0][3]+".png")
Пример #11
0
    def mean_model(self,data,x,data_save,x_save):

        ks_t_D = pd.DataFrame()
        ks_t_pval = pd.DataFrame()
        t_t_pval = pd.DataFrame()
        exp_loc = pd.DataFrame()
        exp_scale = pd.DataFrame()
        time_slot = pd.DataFrame()

        for f2 in np.unique(x):
            d = pd.to_numeric(np.array(data[(x==f2)]))
            loc, scale = expon.fit(d)
            # ks test
            D , kspval = kstest(d,'expon')
            # ttest  - one sided
            sample2 = np.random.exponential(scale, size=d.shape[0])
            val , pval = ttest_ind(d,sample2)

            # if we have combined data then add same model to all combined timeslots
            if self.combined_slots and f2 == self.combine[0]:
                for var in self.combine:
                    exp_loc = exp_loc.append(pd.DataFrame([loc]))
                    exp_scale = exp_scale.append(pd.DataFrame([scale]))
                    ks_t_D = ks_t_D.append(pd.DataFrame([D]))
                    ks_t_pval = ks_t_pval.append(pd.DataFrame([kspval]))
                    t_t_pval = t_t_pval.append(pd.DataFrame([pval / 2]))
                    # add timeslot
                    time_slot = time_slot.append([var])

            else:
                exp_loc = exp_loc.append(pd.DataFrame([loc]))
                exp_scale = exp_scale.append(pd.DataFrame([scale]))
                ks_t_D = ks_t_D.append(pd.DataFrame([D]))
                ks_t_pval = ks_t_pval.append(pd.DataFrame([kspval]))
                t_t_pval = t_t_pval.append(pd.DataFrame([pval / 2]))
                # add timeslot
                time_slot = time_slot.append([f2])


        # this is the final fit
        fit = pd.DataFrame()
        fit[[self.x_names[1]]] = time_slot
        fit['Exp_loc'] = np.array(exp_loc).flatten()
        fit['Exp_scale'] = np.array(exp_scale).flatten()
        fit['KS_D'] = np.array(ks_t_D).flatten()
        fit['KS_PVal'] = np.array(ks_t_pval).flatten()
        fit['Ttest_PVal'] = np.array(t_t_pval).flatten()

        # if self._log:
        #     data_save = np.log(data_save)
        # if self._normal:
        #     day_max = pd.DataFrame({'time':x,'scale':data,'day':days} )
        #     day_max = day_max.groupby("day")["scale"].transform(max)
        #     data = data/day_max
        #     scalings = np.unique(day_max)
        # else:
        #     scalings = 1


        return fit,data_save,x_save
Пример #12
0
def main():
    print('Loading and Processing Orders')

    transactions = load_transactions()
    orders = classify_trades(transactions)
    interarrivals = calculate_interarrival_times(orders)

    buy_orders = [o for o in orders if o.buyer]
    sell_orders = [o for o in orders if not o.buyer]

    arrivals = np.cumsum(interarrivals)

    time_step = 60 * 60 * 1000  #calculating hourly rates

    bins = []
    bin = []

    time = 0

    print('Processing...')

    for i, t in enumerate(arrivals):
        if t > time + time_step:
            jump = int(np.floor((t - time) / time_step))
            time += jump * time_step

            bins.extend([bin + [] * (jump - 1)])
            bin = []
        bin.append(orders[i])

    times = []
    rates = []
    for bin in bins:
        if len(bin) != 0:

            times.append(bin[0].start_time)
            ia = calculate_interarrival_times(bin)
            loc, scale = expon.fit([i for i in ia if i > 1000])
            rates.append(1 / scale)

    fig, ax1 = plt.subplots()

    ax1.plot((np.array(times) - bins[0][0].start_time) / time_step,
             rates,
             label='Genesis Order Rate')

    ax2 = ax1.twinx()

    ax2.plot((np.array([o.start_time
                        for o in orders]) - orders[0].start_time) / time_step,
             [o.price for o in orders],
             label='Price',
             color='orange')

    fig.legend()

    fig.savefig('Order rate through time')

    plt.show()
Пример #13
0
    def fit(data: FloatIterable) -> 'Exponential':
        """
        Fit an Exponential distribution to the data.

        :param data: Iterable of data to fit to.
        """
        loc, scale = expon.fit(data=data, floc=0)
        return Exponential(lambda_=1 / scale)
Пример #14
0
def fit_exp(start_ts):
    start_ts = np.sort(np.array(start_ts))
    intervals = start_ts[1:] - start_ts[:-1]
    # intervals = np.sort(intervals)
    # print(intervals)
    exp_loc, exp_scale = expon.fit(intervals)

    return 1. / exp_scale
Пример #15
0
def DosDist(
    n, e_photon, thick, data, absorb_data
):  # n, the photon numbers; loct, the postion(z) of the electrons;thick, unit, nm.

    f = interp1d(data[:, 0], data[:, 1])
    f2 = interp1d(absorb_data[:, 0], absorb_data[:, 1])

    n1 = int((e_photon - 1) / 0.01)  # change to n
    energy = np.linspace(1., e_photon, n1)
    norm, err = integrate.quad(lambda e: f(e - e_photon) * f(e),
                               1,
                               e_photon,
                               limit=10000)

    data_ene = []
    num_energy = []
    i = 0
    while i < n1:
        n3 = round(1.5 * n * f(energy[i] - e_photon) * f(energy[i]) * 0.01 /
                   norm)  #using n instead of n1
        num_energy.append(n3)
        ener_array = np.empty(n3)
        ener_array.fill(energy[i])
        data_ene.extend(ener_array)
        i += 1
    np.random.shuffle(data_ene)
    '''
    plt.subplot(211)
    plt.plot(data[:,0],data[:,1])
    plt.subplot(212)
    plt.hist(data_ene,bins=30)
    plt.show()
    '''
    p2D = []
    #wl=((19.82-27.95*e_photon+11.15*e_photon**2)*10**-3 )**-1
    wl = (f2(e_photon) * 10**-3)**-1
    pens = expon.rvs(loc=0, scale=wl, size=n)
    penss = list(filter(lambda x: x <= thick, pens))
    params_exp = expon.fit(pens, floc=0)

    i = 0
    for i in range(len(penss)):
        phi = random.uniform(0, 2 * math.pi)  # initial angular on 2D surface
        php = random.uniform(0, 2 *
                             math.pi)  # initial angular on perpdiculat face
        poy = random.uniform(-1 * 10**6,
                             1 * 10**6)  # initial y direction position, nm
        v = np.sqrt(2 * np.abs((data_ene[i] - bandgap)) * ec / me) * 10**9
        p2D.append([
            penss[i], poy, v * math.cos(phi) * math.cos(php),
            v * math.sin(phi) * math.cos(php), v,
            np.abs(data_ene[i] - bandgap)
        ])  #p2D: (z,y,vz,vy,v,ene)
        i += 1
    p2D = np.array(p2D)

    #print p2D
    return params_exp, p2D, penss, params_exp
Пример #16
0
def test_sigdiff(data, var, stat, bmi1, bmi2):
    x = data[data.common_user_id.isin(bmi1)]
    y = data[data.common_user_id.isin(bmi2)]
    if stat == 'n':
        x1 = x[var]
        y1 = y[var]
        print("Significance Tests for " + var)
    else:
        x1 = x[var][stat]
        y1 = y[var][stat]
        print("Significance Tests for " + stat)
    locx, scalex = expon.fit(x1)
    locy, scaley = expon.fit(y1)
    rvsx = expon.rvs(locx, scalex, size=300)
    rvsy = expon.rvs(locy, scaley, size=300)

    print(str(stats.ks_2samp(rvsx, rvsy)))
    print(str(stats.ttest_ind(rvsx, rvsy, equal_var=False)))
Пример #17
0
 def fit(self, X):
     """
     Sets the scale based on the input data
     Args:
         X (array): the data to be used to set the scale
     """
     self.scale = np.zeros(X.shape[-1], dtype=np.float32)
     for i in range(0, X.shape[-1]):
         _, self.scale[i] = expon.fit(X[:, i], floc=0)
Пример #18
0
def fit_exponential_sp(trace,plot=False):
    loc,scale = expon.fit(trace[:,4],floc=0)
    if plot == True:
        xmax = max(trace[:,4])
        xmin = min(trace[:,4])
        xdata = np.linspace(xmin,xmax,num=500)
        plt.plot(xdata,expon.pdf(xdata,loc,scale))
        plt.hist(trace[:,4],bins=50,density=True)
    return loc,scale
Пример #19
0
def fit_gamma_expon_Q(trace,gammafactor=20,exponfactor=2,plot=False):
    params = []
    for i in range(1,len(trace[0])):
        if i < 4:
            a,loc,theta = gamma.fit(trace[:,i],floc = 0)
            params.append([a/gammafactor,loc,theta*gammafactor])
        if i == 4:
            loc,scale = expon.fit(trace[:,i],floc = 0)
            params.append([loc,scale*exponfactor])
    return params
Пример #20
0
def exponential_fit(data, col):
    plt.hist(data[col], bins=10000, density=True, alpha=0.6, color='g')
    loc, scale = expon.fit(data[col])
    xmin, xmax = plt.xlim()
    x = np.linspace(xmin, xmax, 10000)
    # plt.plot(x, p, 'k', linewidth=2)
    # title = "Exponential fitting of "+str(col)
    # plt.title(title)
    # plt.show()
    return x, loc, scale
Пример #21
0
def MLE_plt(categories, inter_arrivals, inter_arrival_means):
    cat_means = cat_mean(inter_arrivals, categories)
    for i in range(0, len(categories)):

        #X = np.asarray(extract_cat_samples(categories.inter_arrivals,categories.categories,i))#for single inter-arrivals in a category
        #X = np_matrix(categories.categories[i][0])#for avg(inter-arrival)/person in a category
        data = [0] * len(categories[i][0])
        for j in range(0, len(categories[i][0])):
            data.append(inter_arrival_means[categories[i][0][j]])
        X = np.asarray(data)
        param = expon.fit(X)  # distribution fitting
        sample_mean = cat_means[i]
        #rate_param = 1.0/sample_mean
        #fitted_pdf = expon.pdf(X,scale = 1/rate_param)
        # rate_param_estimate = exp_rate_param_estimate(sample_means)
        max_sample = max_interarrival_mean(categories, inter_arrivals, i)
        X_plot = np.linspace(0, 2 * sample_mean, 2000)[:, np.newaxis]
        fitted_pdf = expon.pdf(X_plot, loc=param[0], scale=param[1])
        # Generate the pdf (fitted distribution)

        #kde = KernelDensity(kernel='gaussian', bandwidth=4).fit(X)
        #KDEs.append(kde) #to use for prob_return()
        #max_sample = max_interarrival_mean(categories.categories,categories.inter_arrivals,i)
        #X_plot = np.linspace(0,1.5*max_sample,2000)[:, np.newaxis]
        #log_dens = kde.score_samples(X_plot)

        fig = plt.figure()
        #plt.plot(X_plot[:, 0], np.exp(log_dens), '-',label="kernel = '{0}'".format('gaussian'))
        plt.plot(X_plot[:, 0],
                 fitted_pdf,
                 "red",
                 label="Estimated Exponential Dist",
                 linestyle="dashed",
                 linewidth=1.5)
        #plt.draw()
        #plt.pause(0.001)
        plt.title(
            "Parametric MLE (exponential distribution) for category=%s Visitors"
            % (i))
        plt.hist(X,
                 bins=40,
                 normed=1,
                 color="cyan",
                 alpha=.3,
                 label="histogram")  #alpha, from 0 (transparent) to 1 (opaque)
        #plt.hist(combine_inner_lists(extract_cat_samples(categories.inter_arrivals,categories.categories,i)),bins=40,normed=1,color="cyan",alpha=.3,label="histogram") #alpha, from 0 (transparent) to 1 (opaque)
        #plt.hist(np.asarray(categories[i][0]),bins=40,normed=1,color="cyan",alpha=.3,label="histogram") #alpha, from 0 (transparent) to 1 (opaque)
        plt.xlabel("inter-arrival time (days)")
        plt.ylabel("PDF")
        plt.legend()
        save_as = './app/static/img/cat_result/mle/mleplt_cat' + str(
            i) + '.png'  # dump results into mle folder
        plt.savefig(save_as)
        plt.show(block=False)
        plt.close(fig)
Пример #22
0
    def calculate_parameters(self, values):
        """
        Calculate parameters of the current distribution

        Parameters
        -----------
        values
            Empirical values to work on
        """
        if len(values) > 1:
            self.loc, self.scale = expon.fit(values, floc=0)
Пример #23
0
def get_dist_matrix_exp(pred_D):
    labmdas = np.zeros(np.shape(pred_D)[0])
    dist_matrix_exp = np.zeros([np.shape(pred_D)[0], np.shape(pred_D)[0]])
    for i in range(len(labmdas)):
        _, scale = expon.fit(pred_D[i, :, 0], floc=0)
        labmdas[i] = 1. / scale

    for i in tqdm(range(np.shape(dist_matrix_exp)[0])):
        for j in range(np.shape(dist_matrix_exp)[0]):
            dist_matrix_exp[i, j] = JS_divergence_exp(labmdas[i], labmdas[j])
    return dist_matrix_exp
 def rate_from_exp_fit(self):
     '''
     We know the interarrival time of a poisson process has an exponential distribution.
     Fit to it to get the rate!
     To fit to the exponential distribution form for interarrival time, refer to this link.
     https://stackoverflow.com/questions/25085200/scipy-stats-expon-fit-with-no-location-parameter
     '''
     if self.interarrival_times is None:
         self.interarrival_times = self.trigger_intervals_all_files()
     loc, scale = expon.fit(self.interarrival_times, floc=0)
     return 1 / scale
Пример #25
0
def approximating_dists(data,bins):
    try :
        exp_param = expon.fit(data)
    except:
        print "screwed expon fit "
    #print "params for exponential ", exp_param

    try:
        pdf_exp_fitted = expon.pdf(bins, *exp_param[:-2],loc=exp_param[0],scale=exp_param[1]) # fitted distribution
    except :
        print " returning as nothing to plot "
    return [exp_param, pdf_exp_fitted]
Пример #26
0
    def fit(self, data):
        """
        data is an np array
        :param data:
        :return:
        """
        data = np.array(data)
        nPoints = len(data)
        avg = np.mean(data)
        std = np.std(data)
        spikes = data > ([avg + self.spike_std_factor * std] * nPoints)
        self.last = data[-1]
        self.params = None
        if any(spikes):
            self.spike_max = max(data[spikes])
            self.spike_avg = np.mean(data[spikes])
            last_nonzero_idx = np.max(np.nonzero(data))
            self.time_since_last_spike = len(data) - 1 - np.max(
                np.nonzero(spikes))
            interarrivaltime = 0
            spikewidth = 0
            inter_arrival_times = []
            in_spike = False
            has_spiked = False
            spikewidths = []
            for isspike in spikes:
                if not isspike:
                    if in_spike:  # was in spike, now not spike
                        spikewidths.append(spikewidth)
                    spikewidth = 0
                    interarrivaltime = interarrivaltime + 1
                    in_spike = False
                else:
                    if not in_spike and has_spiked:
                        inter_arrival_times.append(interarrivaltime)
                    interarrivaltime = 0
                    spikewidth = spikewidth + 1
                    in_spike = True
                    has_spiked = True
            if len(inter_arrival_times) > 0:
                if self.fit_model == "Weibull":
                    self.params = exponweib.fit(inter_arrival_times,
                                                floc=0,
                                                f0=1)  # a, c, loc, scale
                elif self.fit_model == "Expon":
                    self.params = expon.fit(inter_arrival_times,
                                            floc=0)  # returns loc, scale
                else:  # self.fit_model == "Sampling":
                    self.params = inter_arrival_times
            self.spike_width_avg = int(
                np.mean(spikewidths)) if len(spikewidths) > 0 else 1

        return self
Пример #27
0
def distest_loose(x):
    x = _series(x)
    data = {
        'Shapiro-Wilk (normal)': shapiro(x),
        'D\'Agostino-Pearson (normal)': normaltest(x),
        'Kolmogorov-Smirnov (normal)': kstest(x, norm.cdf, norm.fit(x)),
        'Kolmogorov-Smirnov (powerlaw)': kstest(x, powerlaw.cdf, powerlaw.fit(x)),
        'Kolmogorov-Smirnov (exponential)': kstest(x, expon.cdf, expon.fit(x)),
    }
    keys = data.keys()
    values = (p for _, p in data.values())
    return pd.DataFrame(values, keys, ['p-value']).round(DEC)
Пример #28
0
def q_calibrate(G = 20000, S = 100000, p0 = 0.001):
    qneg = [qscore(np.random.rand(G)) for i in range(S)]
    qneg = np.array(qneg)
    qneg_sorted = qneg[np.argsort(-qneg)]
    n1 = int(p0 * S)
    #lam = S / np.sum(qneg) # maximum likelihood estimate of exponential distribution of Q
    loc, scale = expon.fit(qneg, floc = 0)
    lam = 1 / scale
    qcal = Q_Cal(qmax = qneg_sorted[n1],
                 lam  = lam,
                 p0 = p0,
                 ecdf = ECDF(qneg_sorted))
    return qcal
Пример #29
0
def exp_statistics(data,bins):
    exp_param[-1,-1]
    exp_pdf=[-1]

    try :
        exp_param = expon.fit(data)
    except:
        exp_param[-2,-2]

    try:
        pdf_exp_fitted = expon.pdf(bins, *exp_param[:-2],loc=exp_param[0],scale=exp_param[1]) # fitted distribution
    except :
        exp_param[-1]

    return [exp_param, pdf_exp_fitted]
Пример #30
0
def approximating_dists(data, bins):
    try:
        exp_param = expon.fit(data)
    except:
        print "screwed expon fit "
    #print "params for exponential ", exp_param

    try:
        pdf_exp_fitted = expon.pdf(bins,
                                   *exp_param[:-2],
                                   loc=exp_param[0],
                                   scale=exp_param[1])  # fitted distribution
    except:
        print " returning as nothing to plot "
    return [exp_param, pdf_exp_fitted]
Пример #31
0
    def displayFits(self):
        """
        generates two histograms on the same plot. One uses maximum likelihood to fit
        the data while the other uses the average time.
        """
        tau = 25.0
        nBins = 400
        size = 100
        taulist = []
        taulistavg = []
        for i in range(1000):
            x = range(nBins)
            timeHgValues = np.zeros(nBins, dtype=np.int64)
            timeStamps = expon.rvs(loc=0, scale=tau, size=size)
            ts64 = timeStamps.astype(np.uint64)
            tsBinner.tsBinner(ts64, timeHgValues)
            
            param = sum(timeStamps)/len(timeStamps)
            fit = expon.pdf(x,param)
            fit *= size
            
            taulistavg.append(param)

        for i in range(1000):
            x = range(nBins)
            timeHgValues = np.zeros(nBins, dtype=np.int64)
            timeStamps = expon.rvs(loc=0, scale=tau, size=size)
            ts64 = timeStamps.astype(np.uint64)
            tsBinner.tsBinner(ts64, timeHgValues)
            
            param = expon.fit(timeStamps)
            fit = expon.pdf(x,loc=param[0],scale=param[1])
            fit *= size
            taulist.append(param[1]) 


        hist,bins = np.histogram(taulistavg, bins=20, range=(15,35))
        width = 0.7*(bins[1]-bins[0])
        center = (bins[:-1]+bins[1:])/2
        plt.step(center, hist, where = 'post', label="averagetime", color='g')
        hist,bins = np.histogram(taulist, bins=20, range=(15,35))
        width = 0.7*(bins[1]-bins[0])
        center = (bins[:-1]+bins[1:])/2
        plt.step(center, hist, where = 'post', label="maxlikelihood")
        plt.legend()
        plt.savefig(inspect.stack()[0][3]+".png")
Пример #32
0
def other_simulator(data, n_precip, n_heat, interval, p_rain, strength, samples):
    if strength == 'Meh, sorta feel ok about it':
        mult = 5.0
    elif strength == 'It will probably happen':
        mult = 25.0
    else:
        mult = 50.0
    prior_a = 1.0*mult
    prior_b = (1.0/p_rain.value)*mult
    
    sample = data[data.WEEK==interval]
    years = np.max(sample.YEAR) - np.min(sample.YEAR)
    a, b = prior_a + np.sum(sample.RAIN), prior_b + years
    
    if np.isnan(a)==True: a = 0
    if np.isnan(b)==True: b = 1
    
    gam = gamma.rvs(a=a, scale=1/b, size=samples)
    
    rain_mu = a/b
    mu, sigma = norm.fit(sample.TMAX)
    l, s, = expon.fit(sample.PRCP)
    
    raindays = poisson.rvs(rain_mu, size=samples)
    storm = np.zeros(samples)
    t_vec = np.zeros(samples)
    rf_vec = np.zeros(samples)
    
    for i in range(len(raindays)):
        if raindays[i] > 0:
            if raindays[i] > 7:
                days = np.random.randint(3,7)
            else:
                days = raindays[i]
            t_max = norm.rvs(mu, sigma, size=days)
            rainfall = expon.rvs(l, s, days)
            temp = np.zeros(days)
            for j in range(len(t_max)):
                if rainfall[j] >= n_precip.value and t_max[j] < n_heat.value:
                    temp[j] = 1
            t_vec[i] = np.max(t_max)
            rf_vec[i] = np.sum(rainfall)
            if np.sum(temp) >= 3:
                storm[i] = 1
    
    return gam, storm, raindays, t_vec, rf_vec 
Пример #33
0
def fit_condition_distributions(train_cond_data):
    """
    Calculate the scale parameter for the exponential distribution of correlated conditional variables
    for the Lorenz 96 model in time.

    Args:
        train_cond_data: array of conditioning values where the first column is the current X, and each
            other column is a lagged X value

    Returns:
        array of scale values
    """
    train_cond_exp_scale = np.zeros(train_cond_data.shape[1] - 1)
    for i in range(1, train_cond_data.shape[1]):
        train_cond_exp_scale[i - 1] = expon.fit(np.abs(train_cond_data[:, 0] -
                                                       train_cond_data[:, i]),
                                                floc=0)[1]
    return train_cond_exp_scale
Пример #34
0
def delay_times(ax, dts, bins=500, bounds=None, fit=False, alpha=0.75):
    if bounds is None:
        bounds = [0, 1e5]
    [n, bins, _patches] = plots_base.plot_hist(ax, [dts],
                                               bins=bins,
                                               x_range=bounds,
                                               density=True)
    if fit:
        loc, scale = expon.fit(dts[(dts > bounds[0]) & (dts < bounds[1])])
        red_line = ax.plot(bins[0][:-1],
                           expon.pdf(bins[0][:-1], loc=loc, scale=scale),
                           color="r")
        ax.legend(red_line,
                  "1/tau = " + str(round(1 / (scale * 1e-9))) + " Hz")
    ax.set_xlabel("Delay Times (ns)")
    ax.set_ylabel("Normalized Counts")
    ax.set_xscale("log")
    ax.set_yscale("log")
Пример #35
0
def expon_fit(data, var, stat, bmicat, start, end, space):
    bmimean = np.zeros((4, 3))
    for i in range(len(cat)):
        x = data[data.common_user_id.isin(bmicat[i])]
        if stat == 'n':
            x1 = x[var]
        else:
            x1 = x[var][stat]
        loc, scale = expon.fit(x1)
        f = np.linspace(start, end, space)
        y = expon.pdf(f, loc, scale)
        plt.plot(f, y)
        plt.ylim(0, .14)
        bmimean[i, 0] = x1.mean()
        bmimean[i, 1] = x1.median()
        bmimean[i, 2] = x1.std()
    plt.show()
    return (bmimean)
Пример #36
0
    def calculate_global_time(path):
        # calculate global_time among all adoption times/dates in a dataset
        all_ts = list()
        i = 0
        with open(path, 'r') as f:
            for line in f:
                i += 1
                last_t = 0
                paths = line.strip().split('\t')
                # remove cascade id and label
                paths = paths[2:-1]
                for path in paths:
                    t = int(path.split(':')[1])
                    reaction_t = t - last_t
                    last_t = t
                    all_ts.append(reaction_t)

        return np.mean(all_ts), i, expon.fit(all_ts)
Пример #37
0
def main():
    transactions = classification.load_transactions()
    orders = classification.classify_trades(transactions)
    interarrivals = classification.calculate_interarrival_times(orders)

    arrivals = np.cumsum(interarrivals)

    time_step = 10 * 60 * 1000  #calculating hourly rates

    bins = []
    bin = []

    time = 0

    print('Processing...')

    for i, t in enumerate(arrivals):
        if t > time + time_step:
            jump = int(np.floor((t - time) / time_step))
            time += jump * time_step

            bins.extend([bin + [] * (jump - 1)])
            bin = []
        bin.append(orders[i])

    times = []
    rates = []
    for bin in bins:
        if len(bin) != 0:
            times.append(bin[0].start_time)
            ia = classification.calculate_interarrival_times(bin)
            loc, scale = expon.fit([i for i in ia if i > 1000])
            rates.append(scale)

    rate_returns = [rates[i + 1] / rates[i] for i in range(len(rates) - 1)]

    hours = [[] for i in range(24 * 6)]

    for i, r in enumerate(1 / np.array(rates)):
        hours[i % 24 * 6].append(r)

    plt.plot([i for i in range(24 * 6)], [np.mean(hour) for hour in hours])

    plt.show()
Пример #38
0
def MLE_plt(categories,inter_arrivals,inter_arrival_means):
    cat_means = cat_mean(inter_arrivals,categories)
    for i in range(0,len(categories)):

        #X = np.asarray(extract_cat_samples(categories.inter_arrivals,categories.categories,i))#for single inter-arrivals in a category
        #X = np_matrix(categories.categories[i][0])#for avg(inter-arrival)/person in a category
        data = [0]*len(categories[i][0])
        for j in range(0,len(categories[i][0])):
            data.append(inter_arrival_means[categories[i][0][j]])
        X = np.asarray(data)
        param = expon.fit(X) # distribution fitting
        sample_mean = cat_means[i]
        #rate_param = 1.0/sample_mean
        #fitted_pdf = expon.pdf(X,scale = 1/rate_param)
        # rate_param_estimate = exp_rate_param_estimate(sample_means)
        max_sample = max_interarrival_mean(categories,inter_arrivals,i)
        X_plot = np.linspace(0,2*sample_mean,2000)[:, np.newaxis]
        fitted_pdf = expon.pdf(X_plot,loc=param[0],scale=param[1])
        # Generate the pdf (fitted distribution)

        #kde = KernelDensity(kernel='gaussian', bandwidth=4).fit(X)
        #KDEs.append(kde) #to use for prob_return()
        #max_sample = max_interarrival_mean(categories.categories,categories.inter_arrivals,i)
        #X_plot = np.linspace(0,1.5*max_sample,2000)[:, np.newaxis]
        #log_dens = kde.score_samples(X_plot)

        fig = plt.figure()
        #plt.plot(X_plot[:, 0], np.exp(log_dens), '-',label="kernel = '{0}'".format('gaussian'))
        plt.plot(X_plot[:, 0],fitted_pdf,"red",label="Estimated Exponential Dist",linestyle="dashed", linewidth=1.5)
            #plt.draw()
            #plt.pause(0.001)
        plt.title("Parametric MLE (exponential distribution) for category=%s Visitors"%(i))
        plt.hist(X,bins=40,normed=1,color="cyan",alpha=.3,label="histogram") #alpha, from 0 (transparent) to 1 (opaque)
        #plt.hist(combine_inner_lists(extract_cat_samples(categories.inter_arrivals,categories.categories,i)),bins=40,normed=1,color="cyan",alpha=.3,label="histogram") #alpha, from 0 (transparent) to 1 (opaque)
        #plt.hist(np.asarray(categories[i][0]),bins=40,normed=1,color="cyan",alpha=.3,label="histogram") #alpha, from 0 (transparent) to 1 (opaque)
        plt.xlabel("inter-arrival time (days)")
        plt.ylabel("PDF")
        plt.legend()
        save_as='./app/static/img/cat_result/mle/mleplt_cat'+str(i)+'.png' # dump results into mle folder
        plt.savefig(save_as)
        plt.show(block=False)
        plt.close(fig)
Пример #39
0
def mass_selection_ormel():

    m_star_list = np.linspace(0.08, 0.2, 1000)

    dm = m_star_list[1] - m_star_list[0]

    IMF = np.power(m_star_list, -1.3)  #From Chandler 2003

    #n = (1./0.3)*((1./(0.08**0.3)) - (1./(m_star**0.3)))

    number = IMF * dm

    loc, scale = expon.fit(number.tolist())

    t = expon.rvs(loc, scale, size=1)

    if t[0] < number[0] and t[0] > number[len(number) - 1]:
        return 0.04 * np.power(np.divide(t[0], dm), -1. / 1.3)
    else:
        pass
Пример #40
0
import sys
from scipy.stats import expon
import numpy as np

if sys.stdin.isatty():
    sys.stderr.write("usage: cat data.log | python %s\n" % __file__)
    exit(1)

x = np.array([float(line.strip()) for line in sys.stdin])

# shape, loc, scale
loc, scale = expon.fit(x, floc=0)
print("lambda")
print(1./scale)
# 0.278487310799 3.59082788057
Пример #41
0
def main():
  print(floor(time.time()*1000))
  db = startup_tests()
  eL.main(False)
  conf = getConfig()
  global albumsBest, current_playlist, con, getSongData, repeatsList, nonExplicitList
  albumsBest = db.prepare(
    "SELECT album_genres.album_id, album_genres.similarity from album_genres INNER JOIN albums on albums.album_id=album_genres.album_id WHERE "
    +("SUBSTRING(albums.folder_path,1,1) = '/' and albums.album_id in (select songs.album_id from songs where SUBSTRING(songs.filename,1,1) = '/') AND "
     if conf['production'] else "")
    +("albums.playcount>0 AND " if conf['playlistRepeats'] else "")
    +"album_genres.genre_id=$1")
  getSongData = db.prepare("SELECT songs.song, songs.length FROM songs WHERE songs.album_id=$1")
  if conf['playlistRepeats']:
    repeatsList = [x[0] for lst in db.prepare("select distinct song_id from playlist_song") for x in lst]
  nonExplicitList = conf['nonExplicitList']
  current_playlist = playlistBuilder(db)
  con = databaseCon(db)
  #Doing subgenre/album for "python3 genplaylist type id"
  if len(sys.argv) == 3:
    if sys.argv[1] == 'subgenre':
      genPlaylist(getStartingAlbum(int(sys.argv[2])), production = conf['production'], playlistRepeats = conf['playlistRepeats'],subgenre=int(sys.argv[2]))
    elif sys.argv[1] == 'album':
      current_playlist.fillAlbumsArtistsCache(int(sys.argv[2]))
      current_playlist.album_history.extend([int(sys.argv[2]) for i in range(5)])
      genPlaylist(int(sys.argv[2]), production = conf['production'], playlistRepeats = conf['playlistRepeats'])
    else:
      print("Error with arg1: not matching to album or subgenre:"+sys.argv[1])
      exit(1)
  elif len(sys.argv) != 1 and not (len(sys.argv) == 2 and sys.argv[1].strip().isdigit()):
    print("Error with args; needs some or none!")
    exit(1)
  else:
    if not os.path.isfile("config/schedule.tsv"):
      print("Error: no schedule file found. Write one and save it to config/schedule.tsv")
      exit(1)
    schedule, supergenres = processSchedule()
    if "correctGenreProportions" in conf and conf["correctGenreProportions"]:
      def getGenre(d,h):
        real_genre_vals = dict([x for lst in con.db.prepare("SELECT genre, COUNT(*) FROM playlists GROUP BY genre").chunks() for x in lst])
        for genre, val in supergenres.items():
          real_genre_vals[genre] = real_genre_vals[genre]*val
        mostDiff = min(list(real_genre_vals.items()), key=(lambda x: x[1]))
        print("Lowest corrected proportional genre is "+mostDiff[0]+" at "+str(mostDiff[1])+" playlistcount")
        return mostDiff[0]
    else:
      def getGenre(d,h):
        supergenresSum = sum([supergenres[y] for y in schedule[d][h]])
        print("Generating "+str(ceil(playlistLength/120))+"+ albums out of one of the following genres with the following weights (of which gets picked:")
        genres = [(x, supergenres[x]/supergenresSum) for x in schedule[d][h]]
        print('\t'+(',\t'.join([' : '.join(map(str,x)) for x in genres])))
        real_genre_vals = dict([x for lst in con.db.prepare("SELECT genre, COUNT(*) FROM playlists GROUP BY genre").chunks() for x in lst])
        if len(real_genre_vals) > 0 and any([x > 30 for x in real_genre_vals.values()]):
          print("Since real genre data in playlists present, here are the real proportions:")
          supergenresRealSum = sum([real_genre_vals[y[0]] for y in genres])
          print('\t'+(',\t'.join([' : '.join(map(str,x)) for x in [(y[0], real_genre_vals[y[0]]/supergenresRealSum) for y in genres]])))
          mostDiff = max([(x[0], ((x[1] - real_genre_vals[x[0]]/supergenresRealSum)/x[1])) for x in genres], key=(lambda x: x[1]))
          print("Biggest difference is in "+mostDiff[0]+" by "+str(mostDiff[1])+"%")
          return mostDiff[0]
        return getitem(genres)[0]

    print("Processed supergenres from schedule with frequencies")
    day = list(schedule.keys())[randint(0,6)]
    hour = randint(0,23)
    print("Starting on "+day+" at "+str(hour)+":00:00, and doing a 1-hour playlist for each hour henceforth")
    playlistLength = int(conf['playlistLength'])
    linerTimes = dict([ (t,l)
      for t, l in conf['liners'].items() 
      if (float(t)*60)+float(l) <= playlistLength])
    print("Doing liners during the following times:")
    for t, duration in sorted(list(linerTimes.items())):
      print('\t'+str(t)+':00 - '+str(t)+':'+str(duration))
    subgenres = dict([(x[0], list(x[1:]) if x[1] is not None else [0,x[2]]) for lst in db.prepare("SELECT genre_id, popularity, supergenre FROM genres").chunks() for x in lst])
    subgenres_rvars = {}
    for key in supergenres.keys():
      subgenres_rvars[key] = norm(*norm.fit([x[0] for x in subgenres.values() if x[1]==key]))
    genresUsed = db.prepare("SELECT subgenre, COUNT(subgenre) FROM playlists WHERE playlists.genre = $1 GROUP BY playlists.subgenre")
    getSubgenreName = db.prepare("SELECT genres.genre FROM genres WHERE genres.genre_id = $1") 
    playlistGenerations = int(sys.argv[1]) if len(sys.argv) == 2 else int(conf['playlistGenerations'])

    #done with setup; real work now
    for g in range(playlistGenerations):
      genre = getGenre(day, hour)
      print("Picked "+genre)
      for lst in genresUsed.chunks(genre):
        for subgenre,plays in lst:
          if len(subgenres[subgenre]) == 2:
            subgenres[subgenre].append(plays)
      genresUsed_rvar = expon(*expon.fit([x[2] if len(x)>2 else 0 for x in subgenres.values() if x[1]==genre]))
      possible_subgenres = sorted([ (key,
        ((1-genresUsed_rvar.cdf(val[2] if len(val)>2 else 0))+subgenres_rvars[genre].cdf(val[0])) )
        for key,val in subgenres.items()
        if val[1]==genre], key=lambda x: x[1])
      albums = []
      while len(albums) < 2 and len(possible_subgenres)>0:
        subgenre, temp = getitem(possible_subgenres)
        possible_subgenres.remove((subgenre,temp))
        albums = sorted([[x[0],percentValidation(x[1])] for x in albumsBest(subgenre)], reverse=True)
      if len(possible_subgenres)==0:
        print("Error: couldn't find a suitable subgenre for genre")
      else:
        subgenreName = list(getSubgenreName(subgenre))[0][0]
        print("Picked "+subgenreName+" as a starting subgenre")
        startingAlbum = getStartingAlbum(subgenre, albums)
        current_playlist.fillAlbumsArtistsCache(startingAlbum, genre)
        current_playlist.album_history.extend([a[0] for a in albums[:ceil(len(albums)/20.0)+1] if a[0] in current_playlist.albums])
        try:
          genPlaylist(startingAlbum, linerTimes, playlistLength, production = conf['production'], playlistRepeats = conf['playlistRepeats'], genre=genre)
        except Exception as e:
          handleError(e,"Error with generating this playlist; going to keep making new ones")

      current_playlist = playlistBuilder(db)
      hour = (hour + 1) % 23
      if hour==0:
        weekdays = ['Sunday','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday']
        day = weekdays[(weekdays.index(day)+1 ) % 7]
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import expon
from scipy.stats import norm

# Parte 2. Genere 1000 numeros aleatorias con una distribucion exponencial, grafique el histograma y compare con la PDF conocida de dicha distribucion.

# Luego Realice 1000 sumas de 1000 numeros aleatorios con una distribucion exponencial y compare (haga un fit) a una distribucion normal, verificando el teorema del limite central.

n=[]
for i in range(1000):
    n.append(np.random.exponential(10)) # Llenamos la lista "n" con numeros aleatorios con distribución exponencial de media 10.
    
    
loc1,scale1 = expon.fit(n)  # obetenemos los parámetros "Scale" y "loc" de un fit sobre los datos en la lista "n". Para este caso, la media de la distribución es igual a "scale".
print(scale1,loc1) #imrpimimos estos parámetros

x = np.linspace(0,50, 100)  
y=expon.pdf(x,scale=scale1, loc=loc1)  # Graficamos una distribución exponencial con media 10.

f, fig1 = plt.subplots(1,1)  

fig1.plot(x, y,'r-', lw=5, alpha=0.6, label='expon pdf') #Graficamos x vs y (distribución)
fig1.hist(n,bins=50,normed=True) #Hacemos el histograma de n. Es importante que esté normalizado.
f.savefig('graficas.png') #Guardamos en una archivo las gráficas.

#Hasta aca verificamos que los datos si pertenecen a la distribución dada. Ahora tenemos que repetir el proceso creando una variable que es la suma de las variables generadas.


sumas=[] #En cada elemento de la lista "sumas" guardamos la suma de 1000 varables aleatorias con distribución exponencial. 
for i in range(1000):
Пример #43
0
def signal_variability(data, subplots=False, title=None, density_limits=(-20,0), threshold_level=10):

    import h5py
    if type(data)==h5py._hl.dataset.Dataset:
        title = data.file.filename+data.name
        data = data[:,:]

    from numpy import histogram, log, arange, sign
    import matplotlib.pyplot as plt

    plt.figure()
#   plt.figure(1)
    if subplots:
        rows = subplots[0]
        columns = subplots[1]
        channelNum = 0
    else:
        rows = 1
        columns = 1
        channelNum = arange(data.shape[0])

    for row in range(rows):
        for column in range(columns):
            if type(channelNum)==int and channelNum>=data.shape[0]:
                continue
            print("Calculating Channel "+str(channelNum))

            if type(channelNum)==int:
                ax = plt.subplot(rows, columns, channelNum+1)
            else:
                ax = plt.subplot(rows, columns, 1)

            d = data[channelNum,:]
            dmean = d.mean()
            dstd = d.std()
            ye, xe = histogram(d, bins=100, normed=True)
            if (sign(d)>0).all():
                from scipy.stats import expon
                expon_parameters = expon.fit(d)
                yf = expon.pdf(xe[1:], *expon_parameters)
             #   left_threshold, right_threshold = likelihood_threshold(d, threshold_level, comparison_distribution='expon', comparison_parameters=expon_parameters)
                left_threshold = 0
                right_threshold = 0
            else:
                from scipy.stats import norm
                yf = norm.pdf(xe[1:],dmean, dstd)
                left_threshold, right_threshold = likelihood_threshold(d, threshold_level, comparison_distribution='norm', comparison_parameters=(dmean, dstd))

            x = (xe[1:]-dmean)/dstd
            ax.plot(x, log(ye), 'b-', x ,log(yf), 'r-')
#            ax.set_ylabel('Density')
#            ax.set_xlabel('STD')
            if rows!=1 or columns!=1:
                ax.set_title(str(channelNum))
                ax.set_yticklabels([])
                ax.set_xticklabels([])
            if density_limits:
                ax.set_ylim(density_limits)
            if (sign(d)>0).all():
                ax.plot(((right_threshold-dmean)/dstd, (right_threshold-dmean)/dstd), plt.ylim())
            else:
                ax.plot(((left_threshold-dmean)/dstd, (left_threshold-dmean)/dstd), plt.ylim())
                ax.plot(((right_threshold-dmean)/dstd, (right_threshold-dmean)/dstd), plt.ylim())
            channelNum += 1

    if title:
        plt.suptitle(title)
Пример #44
0
# http://docs.scipy.org/doc/numpy/user/basics.creation.html
data = np.array(crashIntervals)

#print(sorted(data, reverse=True))


# We now try to fit an exponential distribution to the data.

# This will print detailed information of this function!
# print(expon.fit.__doc__)


# http://stackoverflow.com/questions/21610034/fitting-distribution-with-fixed-parameters-in-scipy/
# http://stackoverflow.com/questions/25085200/scipy-stats-expon-fit-with-no-location-parameter

loc, scale = expon.fit(data, floc=0)

print(loc)

print(scale)



# Now, we want to test how well the exponential distribution 
# fits to the data.

# TODO: study more on the kstest
#       try examples in the doc
#       read Wiki page
#       http://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test
#       also some related posts