def plot_poisson(): fig, ax = plt.subplots(1, 1) # This is prediction for Wawrinka in 2014 mu = 7.869325 x = np.arange(poisson.ppf(0.01, mu), poisson.ppf(0.999, mu)) ax.plot(x, poisson.pmf(x, mu), 'wo', ms=8, label='poisson pmf') ax.vlines(x, 0, poisson.pmf(x, mu), colors=['b', 'b', 'b', 'b', 'b', 'r', 'r', 'r', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g'], lw=5, alpha=0.5) rv = poisson(mu) ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1, label='frozen pmf') plt.title("Stanislas Wawrinka") plt.xlabel('# QF+ Finishes in 2014') plt.ylabel('Probability') prob0 = poisson.cdf(6, mu) prob123 = poisson.cdf(9, mu) - poisson.cdf(6, mu) probAbove3 = poisson.cdf(10000, mu) - poisson.cdf(9, mu) print prob0 print prob123 print probAbove3 plt.show()
def testprobabilitiespoisson(self): prob = zeros((4,3)) exp_value = self.data.calculate_equation(self.coefficients[0]) prob[:,0] = poisson.pmf(0, exp_value) prob[:,1] = poisson.pmf(1, exp_value) prob[:,2] = 1 - poisson.cdf(1, exp_value) prob_model = self.model.calc_probabilities(self.data) prob_diff = all(prob == prob_model) self.assertEqual(True, prob_diff)
def lrtest(data,xvars): # Likelihood ratio test between input model and null "traffic only" model pedinj_count = data['pedinj_count'] xvars_0 = shape_xvars(data[['pvtraf']]) xvars_1 = shape_xvars(data[xvars]) inj_pred_0 = bilin_regr(data,['pvtraf'],plot=False).predict(xvars_0) inj_pred_1 = bilin_regr(data,xvars,plot=False).predict(xvars_1) LL_0 = np.log(poisson.pmf(pedinj_count,inj_pred_0)).sum() LL_1 = np.log(poisson.pmf(pedinj_count,inj_pred_1)).sum() TS = -2 * (LL_0 - LL_1) print "TS, LL_1, LL_0 = ", TS, LL_1, LL_0
def _error( value ) : '''Construct Bayesian errors using Poisson distribution''' # likelihood = P(value|lambda) using underlying Poisson assumption # error: lambdas with equal likelihood for which area in between is 68% lambda_up, lambda_down, step_size = 1.1*value, 0.9*value, float(value)/10 for i in range(5) : lambda_up -= step_size; lambda_down += step_size; step_size /= 10 while (poisson.cdf(value,lambda_down) - poisson.cdf(value,lambda_up)) < 0.6826894921370859 : lambda_up += step_size while poisson.pmf(value,lambda_down) > poisson.pmf(value,lambda_up) : lambda_down -= step_size/10 return (value-lambda_down,lambda_up-value)
def poisson_marginals(means, accuracy=1e-10): """ Finds the probability mass functions (pmfs) and approximate supports of a set of Poisson random variables with means specified in input "means". The second argument, "acc", specifies the desired degree of accuracy. The "support" is taken to consist of all values for which the pmfs is greater than acc. Inputs: means: the means of the Poisson RVs acc: desired accuracy Outputs: pmfs: a cell-array of vectors, where the k-th element is the probability mass function of the k-th Poisson random variable. supports: a cell-array of vectors, where the k-th element is a vector of integers of the states that the k-th Poisson random variable would take with probability larger than "acc". E.g., P(kth RV==supports{k}(1))=pmfs{k}(1); Code from the paper: 'Generating spike-trains with specified correlations', Macke et al., submitted to Neural Computation Adapted from `<http://www.kyb.mpg.de/bethgegroup/code/efficientsampling>`_ Parameters ---------- means : Type Description accuracy : int, optional Description (default 1e-10) Returns ------- Value : Type Description """ from scipy.stats import poisson import math cmfs = [] pmfs = [] supps = [] for k in range(len(means)): cmfs.append(poisson.cdf(range(0, int(max(math.ceil(5 * means[k]), 20) + 1)), means[k])) pmfs.append(poisson.pmf(range(0, int(max(math.ceil(5 * means[k]), 20) + 1)), means[k])) supps.append(np.where((cmfs[k] <= 1 - accuracy) & (pmfs[k] >= accuracy))[0]) cmfs[k] = cmfs[k][supps[k]] pmfs[k] = poisson.pmf(supps[k], means[k]) return np.array(pmfs), np.array(cmfs), np.array(supps)
def getExpected(mu): """ Given a mean coverage mu, determine the AUC, X-intercept, and elbow point of a Poisson-distributed perfectly behaved input sample with the same coverage """ x = np.arange(round(poisson.interval(0.99999, mu=mu)[1] + 1)) # This will be an appropriate range pmf = poisson.pmf(x, mu=mu) cdf = poisson.cdf(x, mu=mu) cs = np.cumsum(pmf * x) cs /= max(cs) XInt = cdf[np.nonzero(cs)[0][0]] AUC = sum(poisson.pmf(x, mu=mu) * cs) elbow = cdf[np.argmax(cdf - cs)] return (AUC, XInt, elbow)
def generate_q0_via_shape_fit(data, bin_edges, binned_model, binned_params): '''Generate likelihood ratios based on a template fit to the data. Shape values for bg and signal are determined from integration of underlying pdfs used to generate toys. Use these values to create the q0 statistic.''' bc, bin_edges = np.histogram(data, bin_edges, range=(100, 180)) ibc = np.asarray(range(len(bc))) result = binned_model.fit(bc, ix=ibc, params=binned_params) nll_bg = -np.sum(np.log(poisson.pmf(bc, result.eval(A=0)))) nll_sig = -np.sum(np.log(poisson.pmf(bc, result.best_fit))) q0 = 2*(nll_bg-nll_sig) return q0
def generate_q0_via_bins(data, bin_edges, true_bg_bc, true_sig_bc): '''Generate likelihood ratios based on poisson distributions for each bin in binned data. True values for bg and bg+signal are determined from integration of underlying pdfs used to generate toys. Use these values to create the q0 statistic.''' bc, bin_edges = np.histogram(data, bin_edges, range=(100, 180)) l_bg = 1 l_sig = 1 for i in range(len(bin_edges)-1): l_bg *= poisson.pmf(bc[i], true_bg_bc[i]) l_sig *= poisson.pmf(bc[i], true_bg_bc[i]+true_sig_bc[i]) q0 = -2*(np.log(l_bg)-np.log(l_sig)) return q0
def poisson(): from scipy.stats import poisson fig, ax = plt.subplots(figsize=(14,7)) y = np.asarray(range(0, 16)) p1 = poisson.pmf(y, mu=1.) p3 = poisson.pmf(y, mu=3.) p10 = poisson.pmf(y, mu=10.) ax.plot(y, p1, 'r.-', markersize=20, label='$\lambda=1$', lw=3) ax.plot(y, p3, 'g.-', markersize=20, label='$\lambda=3$', lw=3) ax.plot(y, p10, 'b.-', markersize=20, label='$\lambda=10$', lw=3) ax.set_title('Poisson Distribution', fontsize=20) ax.set_xlabel('$y_i$', fontsize=20) ax.set_ylabel('$p(y_i)$', fontsize=20) ax.legend(fontsize=20) plt.savefig('./diagrams/poisson.svg')
def plot(dist_name, edge): edges = defaultdict(int) if dist_name == 'binomial': dist = Dist.binomial lam = 1 elif dist_name == 'geometric': dist = Dist.geometric lam = 1.5 else: raise ValueError("Wrong dist argument") if edge == 'loop': count = count_loops elif edge == 'parallel': count = count_parallel_edges lam **= 2 else: raise ValueError('Wrong edge argument') for _ in range(1000): g = gen_fixed_degree_graph(dist, 250) edges[count(g)] += 1 x = np.array(list(edges.keys())) y = np.array(list(edges.values())) y = y / np.sum(y) p_y = poisson.pmf(x, lam) plt.style.use('ggplot') experimental = plt.scatter(x, y) theoretical = plt.scatter(x, p_y, c='r', marker='+') plt.legend((experimental, theoretical), ('Experimental', 'Theoretical')) plt.title(edge + ' ' + dist_name) plt.xlabel('N') plt.ylabel('Probability') plt.show()
def test_Na(self): d = 3 for n in range(5): lambda_ = (3/4) * (self.r + self.c) Na_obs = self.R._Na(n, d) Na_exp = log(poisson.pmf(n, lambda_)) assert Na_obs == Na_exp
def draw_degree_distribution(g, mu): """ Draws the degree distribution of a graph and Poisson fit """ from scipy.stats import poisson d = g.get_degree_distribution() v1 = [x/sum(d.values()) for x in d.values()] # sorted as we need to draw the line sorted_d = sorted(d.keys()) v2 = poisson.pmf(sorted_d, mu) fig = plt.figure() ax = fig.add_subplot(111) width = 0.35 ax.bar(np.array(d.keys()) - width/2, v1, width, color='m', label='data') ax.plot(sorted_d, v2, 'c--', label='Poisson') ax.set_xlabel('degree') ax.set_ylabel('probability') ax.set_title("{0} Nodes".format(g._node)) ax.legend() plt.show()
def get_c_constant(lamb, N): total = 0 for k in range(1,N): total += k*poisson.pmf(k, lamb) return total
def calculateClassProbability(num_works,ngram_dict,testdata): prob = {"Dickens":0.0,"Twain":0.0} for author in prob: author_dict = ngram_dict[author] log_product = 0.0 denom = num_works[author] totaldict=gensum(author_dict) totalvec=gensum(testdata) for ngram in testdata: if ngram in author_dict: lamb = author_dict[ngram]/9 pmf = poisson.pmf(testdata[ngram],mu=lamb) if pmf > 0: log_product += math.log(poisson.pmf(testdata[ngram],mu=lamb)) prob[author] = log_product return prob
def EPM_Poisson_countd(mu, library_size): '''Returns the Poisson mutation rate distribution for a given library size Average rate is set by mu, library size is the number of sequnces in the library Returns two lists, probs_list contains the number of sequences with the corresponding number of mutations in mut_list ''' probs_list = [] mut_list = [] alpha = 1-1/(library_size*10) a,b = poisson.interval(alpha, mu, loc=0) a = int(a) b = int(b) for k in range(a,b+1): k_count = int(round(poisson.pmf(k,mu)*library_size,0)) if k_count != 0: probs_list.append(k_count) mut_list.append(k) #If, due to rounding, the total library size is greater than expected #Subtract the difference from the mean (mu) dif = sum(probs_list) - library_size mutation_list = [i for i in range(a,b+1)] index = mutation_list.index(mu) probs_list[index] -= dif return probs_list, mut_list
def lotteryProfit(lastWin, addWin): #Icelandic lottery param tPrice=130 rPay=0.45 rFP=0.57 numbers=40 balls=5 NsplitMax=21 # [lastWinMat, addWinMat]=np.meshgrid(lastWin, addWin) #Number of possible winners Nsplit=np.arange(0,NsplitMax) #Probability of winning Ncomb=comb(numbers,balls) pWin=1./Ncomb #Number of rows bought N=addWin/(rPay*rFP*tPrice) #Probability of splitting the jackpot p=poisson.pmf(Nsplit, pWin*N) #ROI of buying all possible rows costAll=Ncomb*tPrice myWin=lastWin+addWin+costAll*rPay*rFP smallerWin=(1.-rFP)*rPay*costAll weightedWin=np.sum(p/(Nsplit+1.)*myWin) totWin=weightedWin+smallerWin profit=totWin-costAll rprof=profit/costAll return rprof
def P_number_true_obs_fast(args): E_true_links = 0 p_at_least_one_bp_at_given_position = 1- P_breakpoints_in_interval(1, args.bp_ratio, 0) k = 2 * args.readlen / float(args.cov) for i in range((args.insertion_size + args.readlen - args.soft)+1, args.mean + 4*args.stddev): # When internal breakpoint occur within mean + 4*sigma E_true_links += 2*(1/k) * (v(i,args.insertion_size, args.readlen, args.soft) - 1 ) * norm.pdf(i, args.mean,args.stddev)*poisson.pmf(0,args.bp_ratio*i)*p_at_least_one_bp_at_given_position**2 # when no breakpoint occurs on one side E_true_links += 2*(1/k) * 1 * norm.pdf(i, args.mean,args.stddev)*poisson.pmf(0,args.bp_ratio*i)*p_at_least_one_bp_at_given_position # when no breakpoint occurs on both sides E_true_links += (1/k) * 1 * norm.pdf(i, args.mean,args.stddev)*poisson.pmf(0,args.bp_ratio*i) #print v(i,args.insertion_size, args.readlen, args.soft) # when no breakpoint occurs on one side i = args.mean + 4*args.stddev E_true_links += 2*(1/k)*v(i,args.insertion_size, args.readlen, args.soft)*norm.pdf(i, args.mean,args.stddev)*poisson.pmf(0,args.bp_ratio*i)*p_at_least_one_bp_at_given_position # when no breakpoint occurs on both sides i = args.mean + 4*args.stddev print v(i,args.insertion_size, args.readlen, args.soft) print 1/k E_true_links += (1/k)*v(i,args.insertion_size, args.readlen, args.soft)*norm.pdf(i, args.mean,args.stddev)*poisson.pmf(0,args.bp_ratio*i) return E_true_links
def ztp(N, lambda_): """Zero truncated Poisson distribution""" temp = [poisson.pmf(0, item) for item in lambda_] p = [uniform.rvs(loc=item, scale=1-item) for item in temp] ztp = [int(poisson.ppf(p[i], lambda_[i])) for i in range(len(p))] return np.array(ztp)
def _I_ER(dd, beta, eps): n = len(dd) mu_bar = np.dot(np.arange(n), dd) poisson_pdf = poisson.pmf(range(n), beta) return KL_div(dd, poisson_pdf, eps) + 0.5 * (mu_bar - beta) + \ 0.5 * mu_bar * np.log(beta) - \ 0.5 * xlogx(mu_bar)
def test_discreteExFam(): X = np.arange(100) pois = discrete_family(X, poisson.pmf(X, 1)) tol = 1e-5 print (pois._leftCutFromRight(theta=0.4618311,rightCut=(5,.5)), pois._test2RejectsLeft(theta=2.39,observed=5,auxVar=.5)) print pois.interval(observed=5,alpha=.05,randomize=True,auxVar=.5) print abs(1-sum(pois.pdf(0))) pois.ccdf(0, 3, .4) print pois.Var(np.log(2), lambda x: x) print pois.Cov(np.log(2), lambda x: x, lambda x: x) lc = pois._rightCutFromLeft(0, (0,.01)) print (0,0.01), pois._leftCutFromRight(0, lc) pois._rightCutFromLeft(-10, (0,.01)) #[pois.test2Cutoffs(t)[1] for t in range(-10,3)] pois._critCovFromLeft(-10, (0,.01)) pois._critCovFromLeft(0, (0,.01)) pois._critCovFromRight(0, lc) pois._critCovFromLeft(5, (5, 1)) pois._test2RejectsLeft(np.log(5),5) pois._test2RejectsRight(np.log(5),5) pois._test2RejectsLeft(np.log(20),5) pois._test2RejectsRight(np.log(.1),5) print pois._inter2Upper(5,auxVar=.5) print pois.interval(5,auxVar=.5)
def coverage_probability(self,nr_obs, a, mean_lib, stddev_lib,z, coverage_mean, read_len, s_inner,s_outer, b=None, coverage_model = False): ''' Distribution P(o|c,z) for prior probability over coverage. This probability distribution is implemented as an poisson distribution. Attributes: c -- coverage mean -- mean value of poisson distribution. Returns probability P(c) ''' if not b: # only one reference sequence. # We split the reference sequence into two equal # length sequences to fit the model. a = a/2 b = a/2 param = Param(mean_lib, stddev_lib, coverage_mean, read_len, s_inner,s_outer) lambda_ = mean_span_coverage(a, b, z, param) if coverage_model == 'Poisson': return poisson.pmf(nr_obs, lambda_, loc=0) elif coverage_model == 'NegBin': p = 0.01 n = (p*lambda_)/(1-p) return nbinom.pmf(nr_obs, n, p, loc=0) else: # This is equivalent to uniform coverage return 1 #uniform.pdf(nr_obs, loc=lambda_- 0.3*lambda_, scale=lambda_ + 0.3*lambda_ )
def lower_bound_imperct_cdmd(intensity, epsilon, p_f, mu=0, delta=1): """ 经过数学分析,in case of imperfect CDMA, the upper bound of average transmit power is: \mu_t \exp{1/2 (1n(1/10)\sigma/10)^2} E[1/(mu*g)] 其中,经过计算: E[1/(mu g)] = mu * (RADIUS**(GAMMA+2)-RADIUS_INNER**(GAMMA+2))*2/((RADIUS**2-RADIUS_INNER**2)*(GAMMA+2)*RADIUS**GAMMA) @:parameter indensity: float, arrival request density mu: float, default value 0, mean of normal distribution delta: float, default value 1, variance of normal distribution """ p_c = (p_f-epsilon)/(1.0-epsilon) proba_distribution_poisson = [poisson.pmf(k, intensity) for k in range(int(intensity+200))] cumulative = 0.0 n_mean = 0 while cumulative <= 1-epsilon: cumulative += proba_distribution_poisson[n_mean] n_mean += 1 N_c = math.ceil(math.log(1+(n_mean-1)/p_c, 2)) mu_t = pow(2, L*N_c/W/TAU_S)-1 print "mu_t", mu_t, "N_c", N_c, "N", n_mean, "Nc - mu_t*(N-1)", N_c-mu_t*(n_mean-1) CONSTANT_1 = (1.0/N_c)*(1.0/MU)*(RADIUS**(GAMMA+2)-RADIUS_INNER**(GAMMA+2))*2/((RADIUS**2-RADIUS_INNER**2)*(GAMMA+2)*RADIUS**GAMMA) CONSTANT_2 = math.exp(0.5*(mu+math.log(0.1, np.e)*delta/10.0)**2) p_t = mu_t * CONSTANT_1*CONSTANT_2 try: p_t = 10*math.log(1000*p_t, 10) except ValueError: print "Math domain error,p_t is:{0} when i is:{1}".format(p_t, i) print "CONSTANT 2:{0}".format(CONSTANT_2) logger.debug("lamdba:{0} N_bar:{1} N_C:{2} p_t:{3}".format(intensity, n_mean, N_c, p_t)) print "CONSTANT_1,CONSTANT_2,p_t", CONSTANT_1, CONSTANT_2, p_t return p_t
def test_montecarlo(): """ With a three node net, we can test Direct_Sample against numerical integration. """ A = Node("A", ["B"], {"B": np.array([[1,0],[1,.2]])}) B = Node("B", ["C"], {"C": np.array([[1,0],[1,.4]])}) C = Node("C", [], {}) net = CyberNet([A,B,C]) T=10 data = gen_data(T, net, {"A": "infected", "B":"normal", "C": "normal"}) dsres = Direct_Sample(net, data, 10000, 10, {"A": "infected", "B":"normal", "C":"Normal"}) probfroma = np.log(poisson.pmf(np.sum(data[2]=="A"), 12)) def integrand(zbar, T=T, data=data): fromb_times = data[1][data[2]=="B"] #total = len(fromb_times) numbefore = np.sum(fromb_times<=zbar) numafter = np.sum(fromb_times>zbar) pbefore = zbar**numbefore*np.exp(-zbar)/float(factorial([numbefore])[0]) pafter = (1.4*(T-zbar))**numafter*np.exp(-1.4*(T-zbar))/float(factorial([numafter])[0]) return pbefore*pafter*.2*np.exp(-.2*zbar) total = len(data[1][data[2]=="B"]) num_integral = integrate.quad(integrand, 0,10, epsabs=.01) + \ np.exp(-2)*10**total*np.exp(-10)/float(factorial([total])[0]) np.testing.assert_allclose(np.log(num_integral[0]) + probfroma, dsres, atol=0, rtol=.01) #relative test
def CalcMultivariateMultinomial(path_cvs, skip, noreplace): DF_PROB = Read_cvs(path_cvs, skip) print DF_PROB NGRB = len(DF_PROB.index) dct_combis = {} prob_sum = 0. combi_time = {} for dct_coinci in LST_NCOINCI: print '====================' print dct_coinci for keyt in DCT_TINTERVAL.keys(): #preparation for next loop if noreplace==False: dct_combis[keyt] = list(itertools.combinations_with_replacement(DF_PROB.index, dct_coinci[keyt])) else: dct_combis[keyt] = list(itertools.combinations(DF_PROB.index, dct_coinci[keyt])) print keyt, dct_combis[keyt] #for combi_time['T1'], combi_time['T2'], combi_time['T3'] in itertools.product(dct_combis.values()): for combi_time['T1'] in dct_combis['T1']: for combi_time['T2'] in dct_combis['T2']: for combi_time['T3'] in dct_combis['T3']: print combi_time['T1'], combi_time['T2'], combi_time['T3'] prob = 1. for grb in DF_PROB.index: for keytime, strtime in DCT_TINTERVAL.items(): prob = prob * poisson.pmf(combi_time[keytime].count(grb), DF_PROB.ix[grb][strtime]) print prob sys.stdout.flush() prob_sum += prob print 'Total probability:', prob_sum return prob_sum
def _get_poisson(self, x, p): if lookup_poisson.has_key((x, p)): value_poisson = lookup_poisson[(x, p)] else: value_poisson = poisson.pmf(x, p) lookup_poisson[(x, p)] = value_poisson return value_poisson
def generalized_ln_poisson(data,expectation): """ When the data set is not integer based, we need a different way to calculate the poisson likelihood, so we'll use this version, which is appropriate for float data types (using the continuous version of the poisson pmf) as well as the standard integer data type for the discrete Poisson pmf. Returns: the natural logarithm of the value of the continuous form of the poisson probability mass function, given detected counts, 'data' from expected counts 'expectation'. """ if not np.alltrue(data >= 0.0): raise ValueError( "Template must have all bins >= 0.0! Template generation bug?") ln_poisson = 0.0 if bool(re.match('^int',data.dtype.name)): return np.log(poisson.pmf(data,expectation)) elif bool(re.match('^float',data.dtype.name)): return (data*np.log(expectation) - expectation - multigammaln(data+1.0,1)) else: raise ValueError( "Unknown data dtype: %s. Must be float or int!"%psuedo_data.dtype)
def calc_probabilities(self, data): """ The method returns the probabilities for the different count alternatives for the choice variable under consideration. Based on whether model is specified as poisson/negative-binomial, the appropriate probabilities are calculated. Inputs: data - DataArray object """ #TODO: what are the parameters for the negative binomial distribution #[shape_param] = [1,]*nbinom.numargs expected_value = self.calc_expected_value(data) num_choices = self.specification.number_choices probabilities = zeros((data.rows, num_choices)) for i in range(num_choices-1): if self.distribution == 'poisson': probabilities[:,i] = poisson.pmf(i, expected_value) else: #TODO: implement negative binomial probabilities pass if self.distribution == 'poisson': probabilities[:,-1] = 1 - probabilities.cumsum(-1)[:,-1] else: #TODO: implement negative binomial probabilities pass return probabilities
def make_poisson(): x = np.array(range(20)) x = np.linspace(0, 20, 20) p = poisson.pmf(x, l) print x print p return p
def compute_ll(self): ll = 0 for user, movie in self.nonzero_indices: assert self.ratings[user, movie] > 0 ll += np.log(poisson.pmf(self.ratings[user, movie], np.dot(self.thetas[user, :], self.betas[movie, :]))) for user in xrange(self.nusers): try: assert gammapdf(self.xis[user], self.ap, self.ap / self.b) > 0 except AssertionError: print self.xis[user], self.ap, self.ap / self.b, gammapdf(self.xis[user], self.ap, self.ap / self.b) raise ll += np.log(gammapdf(self.xis[user], self.ap, self.ap / self.b)) for topic in xrange(self.ntopics): assert gammapdf(self.thetas[user, topic], self.a, self.xis[user]) > 0 ll += np.log(gammapdf(self.thetas[user, topic], self.a, self.xis[user])) for movie in xrange(self.nmovies): assert gammapdf(self.etas[movie], self.cp, self.cp / self.d) > 0 ll += np.log(gammapdf(self.etas[movie], self.cp, self.cp / self.d)) for topic in xrange(self.ntopics): assert gammapdf(self.betas[movie, topic], self.c, self.etas[movie]) > 0 ll += np.log(gammapdf(self.betas[movie, topic], self.c, self.etas[movie])) return ll
def poisson_distribution(self, lambda_, x): """ Return value of poisson distribution at x with lambda = lambda_ """ return poisson.pmf(x, lambda_)
a = np.arange(0, 10, 0.001) plt.plot(a, expon.pdf(a)) #pdf is probability distribution fun plt.show() #Probability mass function mu = 5 #mean sigma = 2 #Standard Deviation values = np.random.normal(mu, sigma, 10000) plt.hist(values, 50) plt.show() #Binomial probability mass function n, p = 10, 0.5 #n is no. of times experiment runs, p is probability of one outcome a = np.arange(0, 10, 0.001) plt.plot(a, binom.pmf(a, n, p)) #pmf is probability mass fun plt.show() #Poisson probability mass function # eg. My Website gets 500 visits avg per day, what is odds of getting 550?? mu = 500 #mean a = np.arange(400, 600, 0.5) plt.plot(a, poisson.pmf(a, mu)) plt.show() # Percentile & Moments vals = np.random.normal(0, 0.5, 1000) print(np.percentile(vals, 90)) #value at 90 percentile print(np.percentile(vals, 50)) print(np.mean(vals), np.var(vals), skew(vals), kurtosis(vals))
def C_R(y, e_R, h_R, b, L_R, demand_rate): M = round(6 * math.sqrt((L_R + 1) * demand_rate) + (L_R + 1) * demand_rate) return y * e_R - h_R * (L_R + 1) * demand_rate + (h_R + b) * sum([ (d - y) * poisson.pmf(d, (L_R + 1) * demand_rate) for d in range(y, M) ])
#Henrique K. Secchi from scipy.stats import poisson # Média de acidentes de carro é 2 por dia # Qual a probabilidade de ocorrerem 3 acidentes no dia? poisson.pmf(3, 2) # Qual a probabilidade de ocorrerem 3 ou menos acidentes no dia? poisson.cdf(3, 2) # Qual a probabilidade de ocorrerem mais de 3 acidentes no dia? poisson.sf(3, 2)
def L(p): rec = p_to_rec(p) global counter counter += 1 names = [ 'NQ', 'Ts', 'T', 'a_delay', 'q0', 'a0', 'a_pad', 'a_spe', 'a_dpe', 'a_trpe', 'Spad', 'Spe' ] for name in names: if np.any(rec[name] < 0): return 1e10 * (1 - np.amin(rec[name])) names = ['q0', 'a0', 'Spad'] for name in names: if np.any(rec[name] > 1): return 1e10 * (np.amax(rec[name])) if rec['Ts'][0] > 100: return 1e10 * rec['Ts'][0] if np.any(rec['St'][0] < 0.2): return 1e10 * (1 + np.abs(np.amin(rec['St'][0]))) # if np.any(rec['Tf'][0]<1): # return 1e10*(1+np.abs(np.amin(rec['Tf'][0]))) l = 0 P = make_P(rec['a0'][0], rec['Spad'][0], rec['Spe'][0], rec['m_pad'][0]) m = Model(rec['NQ'][0], rec['T'][0], [0, 0], 0, 1, rec['Ts'][0], rec['St'][0], rec['q0'][0], P) m_area = model_area(areas, rec['m_pad'][0], rec['a_pad'][0], rec['a_spe'][0], rec['a_dpe'][0], rec['a_trpe'][0], rec['Spad'][0], rec['Spe'][0]) for i in range(len(pmts)): model = np.sum(H[:, 0, i]) * np.ravel(m[:, :500, i]) if np.any(np.isnan(model)) or np.any(np.isinf(model)): print('model is nan or inf') print('NQ=', rec['NQ'][0, i], 'T=', rec['T'][0, i], 'F=', rec['F'][0, i], 'Tf=', rec['Tf'][0, i], 'Ts=', rec['Ts'][0, i], 'St=', rec['St'][0, i]) plt.figure() plt.plot(np.mean(t.T * np.arange(np.shape(t)[0])), 'k.') plt.show() sys.exit() data = np.ravel(H[:, :500, i]) L = len(model) for j in range(L): if model[j] > 0 and data[j] <= 0: l -= model[j] - data[j] elif model[j] <= 0 and data[j] > 0: return 1e10 * (data[j] - model[j]) elif model[j] == 0 and data[j] == 0: l += 1 else: l += data[j] * np.log(model[j]) - data[j] * np.log( data[j]) + data[j] - model[j] model = np.sum(h_q0[i]) * q0_model(n_q0, rec['q0'][0, i]) data = h_q0[i] L = len(model) for j in range(L): if model[j] > 0 and data[j] <= 0: l -= model[j] - data[j] elif model[j] <= 0 and data[j] > 0: return 1e10 * (data[j] - model[j]) elif model[j] == 0 and data[j] == 0: l += 1 else: l += data[j] * np.log(model[j]) - data[j] * np.log( data[j]) + data[j] - model[j] spectra_rng = np.nonzero( np.logical_and(PEs > PEs[np.argmax(spectra[i]) - 5], PEs < PEs[np.argmax(spectra[i]) + 5]))[0] model = np.sum(H[:, 0, i]) * poisson.pmf( PEs, np.sum(m[:, :, i].T * np.arange(np.shape(H)[0])))[spectra_rng] data = spectra[i][spectra_rng] L = len(model) for j in range(L): if model[j] > 0 and data[j] <= 0: l -= model[j] - data[j] elif model[j] <= 0 and data[j] > 0: return 1e10 * (data[j] - model[j]) elif model[j] == 0 and data[j] == 0: l += 1 else: l += data[j] * np.log(model[j]) - data[j] * np.log( data[j]) + data[j] - model[j] model = m_area[i] data = H_areas[i] L = len(model) for j in range(L): if model[j] > 0 and data[j] <= 0: l -= model[j] - data[j] elif model[j] <= 0 and data[j] > 0: return 1e10 * (data[j] - model[j]) elif model[j] == 0 and data[j] == 0: l += 1 else: l += data[j] * np.log(model[j]) - data[j] * np.log( data[j]) + data[j] - model[j] model = rec['a_delay'][0] * np.exp( -0.5 * (delays[rng_delay] - rec['T'][0, 1] + rec['T'][0, 0])**2 / (rec['St'][0, 0]**2 + rec['St'][0, 1]**2)) / np.sqrt( 2 * np.pi * (rec['St'][0, 0]**2 + rec['St'][0, 1]**2)) data = delay_h[rng_delay] L = len(model) for j in range(L): if model[j] > 0 and data[j] <= 0: l -= model[j] - data[j] elif model[j] <= 0 and data[j] > 0: return 1e10 * (data[j] - model[j]) elif model[j] == 0 and data[j] == 0: l += 1 else: l += data[j] * np.log(model[j]) - data[j] * np.log( data[j]) + data[j] - model[j] if counter % (len(p) + 1) == 0: print('!!!!!!!!!!!!!!!!!!!!!!!!!!!!') print('iteration=', int(counter / (len(p) + 1)), 'fanc=', -l) print('--------------------------------') print(rec) return -l
mu_bound = (0, None) data_0 = minimize(my_pdf, [1, ], args=(z), method='SLSQP', bounds=(mu_bound, )) print(data_0) binwidth = 0.1 n_bins = np.arange(min(data[:, 2]), max(data[:, 2]) + binwidth, binwidth) # Chi2 calculator observed_values, bins, _ = plt.hist(data[:, 2], bins=n_bins) # plt.show() # We normalize by multiplyting the length of the data with the binwidth expected_values = poisson.pmf(bins, data_0.x[0]) * len(data) print(observed_values[observed_values!=0]) print(expected_values[expected_values!=0]) print(chisquare(observed_values[observed_values!=0], f_exp=expected_values[expected_values!=0])) print('Threshold value ', chi2.isf(0.05, 18)) # x = np.arange(-1, 1, 0.01) # y = f_6(x, data_0.x[0], data_0.x[1]) # plt.plot(x,y) # plt.show()
def likelihood_one_game(goals_ht, goals_at, intercept, mu, a_ht, d_ht, a_at, d_at): lambda_ht = np.exp(intercept + mu + a_ht + d_at) lambda_at = np.exp(intercept + a_at + d_ht) p1 = poisson.pmf(goals_ht, lambda_ht) p2 = poisson.pmf(goals_at, lambda_at) return(p1 * p2)
[(df_prediction['Team'] == my_team) & (df_prediction['Oppt'] == op_team) & (df_prediction['AtHome'] == False)])) proba_score.append(sc) proba_conceed.append(cc) # df_sample['ScoreProb'] = proba_score # df_sample['ConceedProb'] = proba_conceed ##Predicting Scores## score_cscc = [] score_goal = [] score_asst = [] for kk in range(0, len(df_sample)): score_cscc.append( np.sum( poisson.pmf(np.arange(0, 7, 1), float(proba_conceed[kk][0])) * payout_cscc)) score_goal.append( np.sum( poisson.pmf(np.arange(0, 7, 1), float(proba_score[kk][0])) * payout_goal * df_sample['GoalRatio'][kk])) score_asst.append( np.sum( poisson.pmf(np.arange(0, 7, 1), float(proba_score[kk][0])) * payout_asst * df_sample['AsstRatio'][kk])) ## Appending GW Score Factor ## score_gw_goal.append(score_goal) score_gw_asst.append(score_asst) # score_gw_save.append(score_goal) score_gw_cscc.append(score_cscc)
def test_pmf_p2(self): poisson_pmf = poisson.pmf(2, 2) genpoisson_pmf = sm.distributions.genpoisson_p.pmf(2, 2, 0, 2) assert_allclose(poisson_pmf, genpoisson_pmf, rtol=1e-15)
def test_pmf_p1(self): poisson_pmf = poisson.pmf(1, 1) genpoisson_pmf = sm.distributions.genpoisson_p.pmf(1, 1, 0, 1) assert_allclose(poisson_pmf, genpoisson_pmf, rtol=1e-15)
# In[14]: minutes = np.arange(0, 160, 5) rv = expon(scale=1. / lambda_from_mean) plt.plot(minutes, rv.pdf(minutes), 'o') timediffs.hist(normed=True, alpha=0.5) plt.xlabel("minutes") plt.title("Normalized data and model for estimated $\hat{\lambda}$") # In[15]: from scipy.stats import poisson k = np.arange(15) plt.figure(figsize=(12, 8)) for i, lambda_ in enumerate([1, 2, 4, 6]): plt.plot(k, poisson.pmf(k, lambda_), '-o', label=lambda_, color=colors[i]) plt.fill_between(k, poisson.pmf(k, lambda_), color=colors[i], alpha=0.5) plt.legend() plt.title("Poisson distribution") plt.ylabel("PDF at $k$") plt.xlabel("$k$") # In[18]: per_hour = df.minutes // 60 num_births_per_hour = df.groupby(per_hour).minutes.count() num_births_per_hour # In[19]: num_births_per_hour.mean()
def SimulateMatch(model, homeTeam, awayTeam, maxGoals=10): cols = [[awayTeam + ' Goals'] * (maxGoals + 1), [goal for goal in range(maxGoals + 1)]] ind = [[homeTeam + ' Goals'] * (maxGoals + 1), [goal for goal in range(maxGoals + 1)]] tuplesCols = list(zip(*cols)) tuplesInd = list(zip(*ind)) columns = pd.MultiIndex.from_tuples(tuplesCols, names=['Away Team', 'Goals']) index = pd.MultiIndex.from_tuples( tuplesInd, names=['Home Team', 'Percent Probability']) htAvgGoals = model.predict( pd.DataFrame(data={ 'team': homeTeam, 'opponent': awayTeam, 'home': 1 }, index=[1])).values[0] atAvgGoals = model.predict( pd.DataFrame(data={ 'team': awayTeam, 'opponent': homeTeam, 'home': 0 }, index=[1])).values[0] FixtureCalculated = [[ round(poisson.pmf(i, teamAvg), 1) for i in range(0, maxGoals + 1) ] for teamAvg in [htAvgGoals, atAvgGoals]] HomeVsAway = pd.DataFrame(np.outer(np.array(FixtureCalculated[0]), np.array(FixtureCalculated[1])), columns=columns, index=index) HomeVsAway = HomeVsAway.style.set_table_styles( [dict(selector='th', props=[('text-align', 'center')])]) HomeVsAway.set_properties(**{'text-align': 'center'}) HomeAwayDraw = { 'Home': round( np.sum( np.tril( np.outer(np.array(FixtureCalculated[0]), np.array(FixtureCalculated[1])), -1)), 3), 'Draw': round( np.sum( np.diag( np.outer(np.array(FixtureCalculated[0]), np.array(FixtureCalculated[1])))), 3), 'Away': round( np.sum( np.triu( np.outer(np.array(FixtureCalculated[0]), np.array(FixtureCalculated[1])), 1)), 3) } returnable = [[round(htAvgGoals, 0), round(atAvgGoals, 0)], HomeVsAway, pd.DataFrame(HomeAwayDraw.values(), index=HomeAwayDraw.keys()).T] return returnable
# Python - POISSON DISTRIBUTION - PROBABILITY DENSITY FUNCTION # http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.poisson.html from scipy.stats import poisson # Enter the value of the mean of the distribution: mean = 10 # Enter the value of x (integer) at which the value of the density function is required: x = 6 # The value at x of the probability density function f(x) is: print() print ("The value of the probability density function at x =", x, "is f(x) =", poisson.pmf(x, mean))
def quick_poisson_pmf(n, lam): global POISSON_PMF_DICT key = (n, lam) if key not in POISSON_PMF_DICT: POISSON_PMF_DICT[key] = poisson.pmf(n, lam) return POISSON_PMF_DICT[key]
def test_pmf_p5(self): poisson_pmf = poisson.pmf(10, 2) genpoisson_pmf_5 = sm.distributions.genpoisson_p.pmf(10, 2, 1e-25, 5) assert_allclose(poisson_pmf, genpoisson_pmf_5, rtol=1e-12)
def negativelyAffectedTrips (T,N,X,m,cluster,bikeData,parLambda,nDays,A, poissonArray,timesArray,ind=None,randomSeed=None, nStations=329): """ Counts the number of negatively affected trips. We divide the bike stations in m groups according to K-means algorithm. The bikes are distributed uniformly in each group. Args: T (int): Duration of the simulation in hours (it always starts at 7:00am). N (numpy array): Vector N(T,A_{i}). X (numpy array): Vector with the initial configuration of the bikes. m (int): Number of groups formed with the bike stations. cluster (List[List[List[int,float,float]]]): Contains the clusters of the bike stations with their ids, and geographic coordinates. bikeData (numpy array): Matrix with the ID, numberDocks, Latitute, and longitude of each bike station. parLambda (numpy array) : Vector with the parameters of the Poisson processes N(T,A_{i}). nDays: Number of different days considered in the simulation (i.e. 365). A (List[List[Tuple(int,int)]]): List with subsets of pair of bike stations. lamb (List[numpy array]): List with the parameters of the Poisson processes N(T,(i,j)), (i.e. jth entry of lamb is the Poisson process with parameter lamb[0][0,j] between stations lamb[0][1,j] and lamb[0][2,j]). poissonArray (List[List[numpy array]]): List with the parameters of the Poisson processes N(T,(i,j)), where the jth entry of poissonArray are the parameters of the Poissons processes of day j between each pair of bike stations. (i.e., the parameter of the Poisson process on day j between stations lamb[j][0][1,l] and lamb[j][0][2,l] is lamb[j][0][0,l]. This is a sparse representation of the original matrix, and so if a pair of stations doesn't appear in the last list, its PP has parameter zero. timesArray (List[List[numpy array]]): Similar tan poissonArray, but with the mean times of traveling between the stations. ind (int or None): Day of the year when the simulation is run. randomSeed (int): Random seed. nStations (int): Number of bike stations. Returns: int: Overall number of negatively affected tripes multiplied by -1. """ if randomSeed is not None: randst = np.random.mtrand.RandomState(randomSeed) else: randst=np.random if ind is None: probs=poisson.pmf(int(N[0]),mu=np.array(parLambda)) probs=probs/np.sum(probs) ind=randst.choice(range(nDays),size=1,p=probs) exponentialTimes=timesArray[ind][0] exponentialTimes2=np.zeros((nStations,nStations)) nExp=len(exponentialTimes[0,:]) for i in range(nExp): t1=exponentialTimes[1,i] t2=exponentialTimes[2,i] exponentialTimes2[t1,t2]=exponentialTimes[0,i] poissonParam=poissonArray[ind] unHappy=0 state=startInitialConfiguration(X,m,cluster,bikeData,nDays) nSets=1 times=[] nTimes=0 for i in range(nSets): temp=PoissonProcess(T,poissonParam,A[i],N[i],randst) nTimes+=temp[1] times.extend(temp[0]) Times=np.zeros((nTimes,3)) k=0 for i in range(len(times)): for j in range(len(times[i][1])): Times[k,0]=times[i][1][j] #arrival times Times[k,1]=times[i][0][0] #station i Times[k,2]=times[i][0][1] #station j k+=1 Times=Times[Times[:,0].argsort()] currentTime=0 dropTimes=[] for i in xrange(nTimes): currentTime=Times[i,0] while (dropTimes and currentTime>dropTimes[0][0]): if state[dropTimes[0][1],0]>0: state[dropTimes[0][1],0]=state[dropTimes[0][1],0]-1 state[dropTimes[0][1],1]+=1 dropTimes.pop(0) else: unHappy+=1 j=findBikeStation(state,dropTimes[0][1]) state[j,0]=state[j,0]-1 state[j,1]=state[j,1]+1 dropTimes.pop(0) bikePickUp=Times[i,1] bikeDrop=Times[i,2] if state[bikePickUp,1]==0: unHappy+=1 continue indi=exponentialTimes[1,] timeUsed=randst.exponential(exponentialTimes2[bikePickUp,bikeDrop]) dropTimes.append((currentTime+timeUsed,bikeDrop)) dropTimes=sorted(dropTimes, key=lambda x:x[0]) state[bikePickUp,1]=state[bikePickUp,1]-1 state[bikePickUp,0]=state[bikePickUp,0]+1 return -unHappy
def test_pmf_zero(self): poisson_pmf = poisson.pmf(3, 2) zipoisson_pmf = sm.distributions.zipoisson.pmf(3, 2, 0) assert_allclose(poisson_pmf, zipoisson_pmf, rtol=1e-12)
def level_distribution(self, k, mu): _dists = np.array([poisson.pmf(kk, mu) for kk in range(1, k+1)]) return _dists / np.sum(_dists)
def test_pmf(self): poisson_pmf = poisson.pmf(2, 2) zipoisson_pmf = sm.distributions.zipoisson.pmf(2, 2, 0.1) assert_allclose(poisson_pmf, zipoisson_pmf, rtol=5e-2, atol=5e-2)
mu = 5.0 sigma = 2.0 values = np.random.normal(mu, sigma, 10000) plt.hist(values, 50) plt.show() #Expontial PDF(Probability Distribution Function)/"Power Law" x = np.arange(0, 10, 10000) plt.plot(x, expon.pdf(x)) #Binomial probability mass function n, p = 10, 0.5 x = np.arange(0, 10, 0.001) plt.plot(x, binom.pmf(x, n, p)) #Poisson probability mass function mu = 500 x = np.arange(400, 600, 0.5) plt.plot(x, poisson.pmf(x, mu)) vals = np.random.normal(0, 0.5, 10000) plt.hist(vals, 50) plt.show() print(np.mean(vals)) print(np.var(vals)) print(sp.skew(vals)) print(sp.kurtosis(vals)) plt.hist(vals, 50) plt.show()
if params.detailed: print( "\nThe TERMINAL branch length is %1.3e, expecting %1.1f mutations vs an observed %d" % (corrected_terminal_branch_length, expected_terminal_mutations, terminal_mutation_count)) print("Of these %d mutations," % terminal_mutation_count + "".join([ '\n\t - %d occur %d times' % (n, mi) for mi, n in enumerate(multiplicities_terminal) if n ])) ########################################################################### ### Output the distribution of times mutations at particular positions are observed ########################################################################### print("\nOf the %d positions in the genome," % L + "".join([ '\n\t - %d were hit %d times (expected %1.2f)' % (n, mi, L * poisson.pmf(mi, 1.0 * total_mutations / L)) for mi, n in enumerate(multiplicities_positions) if n ])) # compare that distribution to a Poisson distribution with the same mean p = poisson.pmf(np.arange(10 * multiplicities_positions.max()), 1.0 * total_mutations / L) print( "\nlog-likelihood difference to Poisson distribution with same mean: %1.3e" % (-L * np.sum(p * np.log(p + 1e-100)) + np.sum(multiplicities_positions * np.log(p[:len(multiplicities_positions)] + 1e-100)))) ########################################################################### ### Output the mutations that are observed most often ###########################################################################
linewidth=3) ax1.legend(fontsize=15) ax2.legend(fontsize=15) ax2.set_xlabel('Time [ns]', fontsize='15') fig.text(0.04, 0.5, r'$N_{events}\sum_n nH_{ni}$', va='center', rotation='vertical', fontsize=15) spectra_rng = np.nonzero( np.logical_and(PEs > PEs[np.argmax(spectra[0]) - 5], PEs < PEs[np.argmax(spectra[0]) + 5]))[0] model = np.sum(H[:, 0, 0]) * poisson.pmf( PEs, np.sum(m[:, :, 0].T * np.arange(np.shape(H)[0])))[spectra_rng] ax3.plot(spectra[0], 'ko', label='spectrum - PMT7') ax3.plot(PEs[spectra_rng], model, 'r-.') spectra_rng = np.nonzero( np.logical_and(PEs > PEs[np.argmax(spectra[1]) - 5], PEs < PEs[np.argmax(spectra[1]) + 5]))[0] model = np.sum(H[:, 0, 1]) * poisson.pmf( PEs, np.sum(m[:, :, 1].T * np.arange(np.shape(H)[0])))[spectra_rng] ax4.plot(spectra[1], 'ko', label='spectrum - PMT7') ax4.plot(PEs[spectra_rng], model, 'r-.') fig, ((ax1, ax3), (ax2, ax4)) = plt.subplots(2, 2) ax3.plot(delays, delay_h, 'ko') ax3.plot(delays[rng_delay], rec['a_delay'][0] *
def _get_fit(self, per_loc, per_admit, per_cc, LOS_cc, LOS_nc, per_vent, ppe_GLOVE_SURGICAL, ppe_GLOVE_EXAM_NITRILE, ppe_GLOVE_GLOVE_EXAM_VINYL, ppe_MASK_FACE_PROCEDURE_ANTI_FOG, ppe_MASK_PROCEDURE_FLUID_RESISTANT, ppe_GOWN_ISOLATION_XLARGE_YELLOW, ppe_MASK_SURGICAL_ANTI_FOG_W_FILM, ppe_SHIELD_FACE_FULL_ANTI_FOG, ppe_RESPIRATOR_PARTICULATE_FILTER_REG, TimeLag, PopSize, ForecastDays, forecasted_y, focal_loc, fdates, new_cases, model, Forecasted_cases_df_for_download, Forecasted_patient_census_df_for_download, Forecasted_ppe_needs_df_for_download): # declare figure object fig = plt.figure(figsize=(15, 17)) # Declare figure axis to hold table of forecasted cases, visits, admits ax = plt.subplot2grid((6, 4), (0, 2), colspan=2, rowspan=2) # The figure will actually be a table so turn the figure axes off ax.axis('off') # shorten location name if longer than 12 characters loc = str(focal_loc) if len(loc) > 12: loc = loc[:12] loc = loc + '...' # declare column labels col_labels = ['Total cases', 'New cases', 'New visits', 'New admits'] # row labels are the dates row_labels = fdates.tolist() # truncate forecasted_y to only the current day and days # in the forecast window # lists to hold table values table_vals = [] cclr_vals = [] rclr_vals = [] #### Inclusion of time lag # time lag is modeled as a Poisson distributed # random variable with a mean chosen by the user (TimeLag) new_cases_lag = [] x = list(range(len(forecasted_y))) for i in new_cases: lag_pop = i * poisson.pmf(x, TimeLag) new_cases_lag.append(lag_pop) # Declare a list to hold time-staggered lists # This will allow the time-lag effects to # be summed across rows (days) lol = [] for i, daily_vals in enumerate(new_cases_lag): # number of indices to pad in front fi = [0] * i diff = len(new_cases) - len(fi) # number of indices to pad in back bi = [0] * diff ls = list(fi) + list(daily_vals) + list(bi) lol.append(np.array(ls)) # convert the list of time-staggered lists to an array ar = np.array(lol) # get the time-lagged sum of visits across days ts_lag = np.sum(ar, axis=0) # upper truncate for the number of days in observed y values ts_lag = ts_lag[:len(new_cases)] ts_lag = ts_lag[:len(new_cases)] # row labels are the dates row_labels = fdates.tolist() # only show the current date and dates in the forecast window row_labels = row_labels[-(ForecastDays + 1):] # lower truncate lists for forecast window # that is, do not include days before present day new_cases = new_cases[-(ForecastDays + 1):] forecasted_y = forecasted_y[-(ForecastDays + 1):] ts_lag2 = ts_lag[-(ForecastDays + 1):] # Declare pandas dataframe to hold data for download Forecasted_cases_df_for_download = pd.DataFrame(columns=['date'] + col_labels) # For each date intended for the output table Total = [] New = [] Visits = [] Admits = [] for i in range(len(row_labels)): new = new_cases[i] val = ts_lag2[i] # each cell is a row with 4 columns: # Total cases, # new cases, # time-lagged visits to your hospital, # time-lagged admits to your hospital cell = [ int(np.round(forecasted_y[i])), int(np.round(new)), int(np.round(val * (per_loc * 0.01))), int(np.round((0.01 * per_admit) * val * (per_loc * 0.01))) ] Total.append(cell[0]) New.append(cell[1]) Visits.append(cell[2]) Admits.append(cell[3]) # Add the row to the dataframe df_row = [row_labels[i]] df_row.extend(cell) labs = ['date'] + col_labels temp = pd.DataFrame([df_row], columns=labs) Forecasted_cases_df_for_download = pd.concat( [Forecasted_cases_df_for_download, temp]) # color the first row grey and remaining rows white if i == 0: rclr = '0.8' cclr = ['0.8', '0.8', '0.8', '0.8'] else: rclr = 'w' cclr = ['w', 'w', 'w', 'w'] table_vals.append(cell) cclr_vals.append(cclr) rclr_vals.append(rclr) # Generate and customize table for output ncol = 4 lim = 15 the_table = plt.table(cellText=table_vals[0:lim], colWidths=[0.32, 0.32, 0.32, 0.32], rowLabels=row_labels[0:lim], colLabels=col_labels, cellLoc='center', loc='upper center', cellColours=cclr_vals[0:lim], rowColours=rclr_vals[0:lim]) the_table.auto_set_font_size(True) the_table.scale(1, 1.32) # Customize table title titletext = 'Forecasted cases for ' + loc + '\nData beyond 14 days is available in the csv (below)' plt.title(titletext, fontsize=14, fontweight='bold') ax = plt.subplot2grid((6, 4), (0, 0), colspan=2, rowspan=2) #plt.plot(row_labels, Total, c='0.2', label='Total cases', linewidth=3) #plt.plot(row_labels, New, c='0.5', label='New cases', linewidth=3) plt.plot(row_labels, Visits, c='Crimson', label='New visits', linewidth=3) plt.plot(row_labels, Admits, c='Steelblue', label='New admits', linewidth=3) plt.title('Forecasted visits & admits', fontsize=16, fontweight='bold') # log-scale y-values to base 10 if the user has chosen #if log_scl == True: # plt.yscale('log') # As before, limit dates displayed on the x-axis # prevents overcrowding ax = plt.gca() temp = ax.xaxis.get_ticklabels() temp = list(set(temp) - set(temp[::12])) for label in temp: label.set_visible(False) # As before, remove legend line handles and change the color of # the text to match the color of the line leg = ax.legend(handlelength=0, handletextpad=0, fancybox=False, loc='best', frameon=False, fontsize=14) for line, text in zip(leg.get_lines(), leg.get_texts()): text.set_color(line.get_color()) for item in leg.legendHandles: item.set_visible(False) plt.ylabel('COVID-19 cases', fontsize=14, fontweight='bold') plt.xlabel('Date', fontsize=14, fontweight='bold') # Generate figure for patient census ax = plt.subplot2grid((6, 4), (2, 0), colspan=2, rowspan=2) #### Construct arrays for critical care and non-critical care patients cc = (0.01 * per_cc) * (0.01 * per_admit) * ( 0.01 * per_loc) * np.array(ts_lag) cc = cc.tolist() nc = (1 - (0.01 * per_cc)) * (0.01 * per_admit) * ( 0.01 * per_loc) * np.array(ts_lag) nc = nc.tolist() # LOS for non critical care = 5 days # LOS for critical care = 10 days # Model length of stay (LOS) as a lognormally distributed # random variable #sigma = 0.3 #n_cc = np.log(LOS_cc) - (sigma**2)/2 #n_nc = np.log(LOS_nc) - (sigma**2)/2 #x_vars = np.array(list(range(1, len(fdates)+1))) #p_nc = 0.5 + 0.5 * sc.special.erf((np.log(x_vars) - n_nc)/(2**0.5*sigma)) #p_cc = 0.5 + 0.5 * sc.special.erf((np.log(x_vars) - n_cc)/(2**0.5*sigma)) # Model length of stay (LOS) as a binomially distributed # random variable according to binomial parameters p and n # p: used to obtain a symmetrical distribution # n: (n_cc & n_nc) = 2 * LOS will produce a binomial # distribution with a mean equal to the LOS p = 0.1 n_cc = LOS_cc * 10 n_nc = LOS_nc * 10 # get the binomial random variable properties rv_nc = binom(n_nc, p) # Use the binomial cumulative distribution function p_nc = rv_nc.cdf(np.array(range(1, len(fdates) + 1))) # get the binomial random variable properties rv_cc = binom(n_cc, p) # Use the binomial cumulative distribution function p_cc = rv_cc.cdf(np.array(range(1, len(fdates) + 1))) # Initiate lists to hold numbers of critical care and non-critical care patients # who are expected as new admits (index 0), as 1 day patients, 2 day patients, etc. LOScc = np.zeros(len(fdates)) LOScc[0] = ts_lag[0] * (0.01 * per_cc) * (0.01 * per_admit) * (0.01 * per_loc) LOSnc = np.zeros(len(fdates)) LOSnc[0] = ts_lag[0] * (1 - (0.01 * per_cc)) * (0.01 * per_admit) * ( 0.01 * per_loc) total_nc = [] total_cc = [] # Roll up patient carry-over into lists of total critical care and total # non-critical patients expected for i, day in enumerate(fdates): LOScc = LOScc * (1 - p_cc) LOSnc = LOSnc * (1 - p_nc) LOScc = np.roll(LOScc, shift=1) LOSnc = np.roll(LOSnc, shift=1) LOScc[0] = ts_lag[i] * (0.01 * per_cc) * (0.01 * per_admit) * ( 0.01 * per_loc) LOSnc[0] = ts_lag[i] * (1 - (0.01 * per_cc)) * ( 0.01 * per_admit) * (0.01 * per_loc) total_nc.append(np.sum(LOSnc)) total_cc.append(np.sum(LOScc)) # Plot the critical care and non-critical care patient census over the # forecasted time frame plt.plot(fdates[-(ForecastDays + 1):], total_cc[-(ForecastDays + 1):], c='m', label='Critical care', linewidth=3) plt.plot(fdates[-(ForecastDays + 1):], total_nc[-(ForecastDays + 1):], c='0.4', label='Non-critical care', linewidth=3) plt.title('Forecasted census', fontsize=16, fontweight='bold') # log-scale y-values to base 10 if the user has chosen #if log_scl == True: # plt.yscale('log') # As before, limit dates displayed on the x-axis # prevents overcrowding ax = plt.gca() temp = ax.xaxis.get_ticklabels() temp = list(set(temp) - set(temp[::12])) for label in temp: label.set_visible(False) # As before, remove legend line handles and change the color of # the text to match the color of the line leg = ax.legend(handlelength=0, handletextpad=0, fancybox=False, loc='best', frameon=False, fontsize=14) for line, text in zip(leg.get_lines(), leg.get_texts()): text.set_color(line.get_color()) for item in leg.legendHandles: item.set_visible(False) plt.ylabel('COVID-19 patients', fontsize=14, fontweight='bold') plt.xlabel('Date', fontsize=14, fontweight='bold') # Declare axis to be used for patient census table # and turn the visibility off ax = plt.subplot2grid((6, 4), (2, 2), colspan=2, rowspan=2) ax.axis('off') # Truncate location names if longer than 12 characters if len(loc) > 12: loc = loc[:12] loc = loc + '...' # declare table column labels col_labels = ['All COVID', 'Non-ICU', 'ICU', 'Vent'] # declare row labels as dates row_labels = fdates.tolist() # truncate row labels and values to the present day # and days in the forecast window row_labels = row_labels[-(ForecastDays + 1):] total_nc_trunc = total_nc[-(ForecastDays + 1):] total_cc_trunc = total_cc[-(ForecastDays + 1):] # declare lists to hold table cell values and # row colors table_vals, cclr_vals, rclr_vals = [], [], [] # declare pandas dataframe to hold patient census data for download Forecasted_patient_census_df_for_download = pd.DataFrame( columns=['date'] + col_labels) # For each row... for i in range(len(row_labels)): # Each cell is a row that holds: # Total number of admits expected, # Total number of non-critical care COVID-19 patients expected # Total number of critical care COVID-19 patents expected # Total number of ICU patients on ventilators expected cell = [ int(np.round(total_nc_trunc[i] + total_cc_trunc[i])), int(np.round(total_nc_trunc[i])), int(np.round(total_cc_trunc[i])), int(np.round(total_cc_trunc[i] * (0.01 * per_vent))) ] # add the cell to the dataframe intended for csv download df_row = [row_labels[i]] df_row.extend(cell) labs = ['date'] + col_labels temp = pd.DataFrame([df_row], columns=labs) Forecasted_patient_census_df_for_download = pd.concat( [Forecasted_patient_census_df_for_download, temp]) # set colors of rows if i == 0: rclr = '0.8' cclr = ['0.8', '0.8', '0.8', '0.8'] else: rclr = 'w' cclr = ['w', 'w', 'w', 'w'] # append cells and colors to respective lists table_vals.append(cell) cclr_vals.append(cclr) rclr_vals.append(rclr) # limit the number of displayed table rows ncol = 4 lim = 15 # declare and customize the table the_table = plt.table(cellText=table_vals[0:lim], colWidths=[0.255, 0.255, 0.255, 0.255], rowLabels=row_labels[0:lim], colLabels=col_labels, cellLoc='center', loc='upper center', cellColours=cclr_vals[0:lim], rowColours=rclr_vals[0:lim]) the_table.auto_set_font_size(True) the_table.scale(1, 1.32) # Set the plot (table) title titletext = 'Beds needed for COVID-19 cases' + '\nData beyond 14 days is available in the csv (below)' plt.title(titletext, fontsize=14, fontweight='bold') ####################### PPE ################################## ax = plt.subplot2grid((6, 4), (4, 0), colspan=2, rowspan=2) #### Construct arrays for critical care and non-critical care patients # All covid patients expected in house on each forecasted day. PUI is just a name here PUI_COVID = np.array(total_nc) + np.array(total_cc) # Preparing to add new visits, fraction of new cases visiting your hospital = 0.01 * per_loc new_visits_your_hospital = ts_lag * (0.01 * per_loc) # Add number of new visits to number of in house patients PUI_COVID = PUI_COVID + new_visits_your_hospital glove_surgical = np.round(ppe_GLOVE_SURGICAL * PUI_COVID).astype('int') glove_nitrile = np.round(ppe_GLOVE_EXAM_NITRILE * PUI_COVID).astype('int') glove_vinyl = np.round(ppe_GLOVE_GLOVE_EXAM_VINYL * PUI_COVID).astype('int') face_mask = np.round(ppe_MASK_FACE_PROCEDURE_ANTI_FOG * PUI_COVID).astype('int') procedure_mask = np.round(ppe_MASK_PROCEDURE_FLUID_RESISTANT * PUI_COVID).astype('int') isolation_gown = np.round(ppe_GOWN_ISOLATION_XLARGE_YELLOW * PUI_COVID).astype('int') surgical_mask = np.round(ppe_MASK_SURGICAL_ANTI_FOG_W_FILM * PUI_COVID).astype('int') face_shield = np.round(ppe_SHIELD_FACE_FULL_ANTI_FOG * PUI_COVID).astype('int') respirator = np.round(ppe_RESPIRATOR_PARTICULATE_FILTER_REG * PUI_COVID).astype('int') ppe_ls = [ [glove_surgical, 'GLOVE SURGICAL', 'r'], [glove_nitrile, 'GLOVE EXAM NITRILE', 'orange'], [glove_vinyl, 'GLOVE EXAM VINYL', 'goldenrod'], [face_mask, 'MASK FACE PROCEDURE ANTI FOG', 'limegreen'], [procedure_mask, 'MASK PROCEDURE FLUID RESISTANT', 'green'], [isolation_gown, 'GOWN ISOLATION XLARGE YELLOW', 'cornflowerblue'], [surgical_mask, 'MASK SURGICAL ANTI FOG W/FILM', 'blue'], [face_shield, 'SHIELD FACE FULL ANTI FOG', 'plum'], [respirator, 'RESPIRATOR PARTICULATE FILTER REG', 'darkviolet'] ] linestyles = [ 'dashed', 'dotted', 'dashdot', 'dashed', 'dotted', 'dashdot', 'dotted', 'dashed', 'dashdot' ] for i, ppe in enumerate(ppe_ls): plt.plot(fdates[-(ForecastDays + 1):], ppe[0][-(ForecastDays + 1):], c=ppe[2], label=ppe[1], linewidth=2, ls=linestyles[i]) plt.title('Forecasted PPE needs', fontsize=16, fontweight='bold') #if log_scl == True: # plt.yscale('log') ax = plt.gca() temp = ax.xaxis.get_ticklabels() temp = list(set(temp) - set(temp[::12])) for label in temp: label.set_visible(False) leg = ax.legend(handlelength=0, handletextpad=0, fancybox=True, loc='best', frameon=True, fontsize=8) for line, text in zip(leg.get_lines(), leg.get_texts()): text.set_color(line.get_color()) for item in leg.legendHandles: item.set_visible(False) plt.ylabel('PPE Supplies', fontsize=14, fontweight='bold') plt.xlabel('Date', fontsize=14, fontweight='bold') ax = plt.subplot2grid((6, 4), (4, 2), colspan=2, rowspan=2) ax.axis('off') #ax.axis('tight') #### Construct arrays for critical care and non-critical care patients #PUI_COVID = np.array(total_nc) + np.array(total_cc) PUI_COVID = PUI_COVID[-(ForecastDays + 1):] glove_surgical = np.round(ppe_GLOVE_SURGICAL * PUI_COVID).astype('int') glove_nitrile = np.round(ppe_GLOVE_EXAM_NITRILE * PUI_COVID).astype('int') glove_vinyl = np.round(ppe_GLOVE_GLOVE_EXAM_VINYL * PUI_COVID).astype('int') face_mask = np.round(ppe_MASK_FACE_PROCEDURE_ANTI_FOG * PUI_COVID).astype('int') procedure_mask = np.round(ppe_MASK_PROCEDURE_FLUID_RESISTANT * PUI_COVID).astype('int') isolation_gown = np.round(ppe_GOWN_ISOLATION_XLARGE_YELLOW * PUI_COVID).astype('int') surgical_mask = np.round(ppe_MASK_SURGICAL_ANTI_FOG_W_FILM * PUI_COVID).astype('int') face_shield = np.round(ppe_SHIELD_FACE_FULL_ANTI_FOG * PUI_COVID).astype('int') respirator = np.round(ppe_RESPIRATOR_PARTICULATE_FILTER_REG * PUI_COVID).astype('int') ppe_ls = [ [glove_surgical, 'GLOVE SURGICAL', 'r'], [glove_nitrile, 'GLOVE EXAM NITRILE', 'orange'], [glove_vinyl, 'GLOVE EXAM VINYL', 'goldenrod'], [face_mask, 'MASK FACE PROCEDURE ANTI FOG', 'limegreen'], [procedure_mask, 'MASK PROCEDURE FLUID RESISTANT', 'green'], [isolation_gown, 'GOWN ISOLATION XLARGE YELLOW', 'cornflowerblue'], [surgical_mask, 'MASK SURGICAL ANTI FOG W/FILM', 'blue'], [face_shield, 'SHIELD FACE FULL ANTI FOG', 'plum'], [respirator, 'RESPIRATOR PARTICULATE FILTER REG', 'darkviolet'] ] if len(loc) > 12: loc = loc[:12] loc = loc + '...' col_labels = [ ppe_ls[0][1], ppe_ls[1][1], ppe_ls[2][1], ppe_ls[3][1], ppe_ls[4][1], ppe_ls[5][1], ppe_ls[6][1], ppe_ls[7][1], ppe_ls[8][1] ] row_labels = fdates.tolist() row_labels = row_labels[-(ForecastDays + 1):] table_vals = [] cclr_vals = [] rclr_vals = [] Forecasted_ppe_needs_df_for_download = pd.DataFrame(columns=['date'] + col_labels) for i in range(len(row_labels)): cell = [ ppe_ls[0][0][i], ppe_ls[1][0][i], ppe_ls[2][0][i], ppe_ls[3][0][i], ppe_ls[4][0][i], ppe_ls[5][0][i], ppe_ls[6][0][i], ppe_ls[7][0][i], ppe_ls[8][0][i] ] df_row = [row_labels[i]] df_row.extend(cell) labs = ['date'] + col_labels temp = pd.DataFrame([df_row], columns=labs) Forecasted_ppe_needs_df_for_download = pd.concat( [Forecasted_ppe_needs_df_for_download, temp]) if i == 0: rclr = '0.8' cclr = [ '0.8', '0.8', '0.8', '0.8', '0.8', '0.8', '0.8', '0.8', '0.8' ] else: rclr = 'w' cclr = ['w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w'] table_vals.append(cell) cclr_vals.append(cclr) rclr_vals.append(rclr) #ncol = 9 cwp = 0.15 lim = 15 the_table = plt.table(cellText=table_vals[0:lim], colWidths=[cwp] * 9, rowLabels=row_labels[0:lim], colLabels=None, cellLoc='center', loc='upper center', cellColours=cclr_vals[0:lim], rowColours=rclr_vals[0:lim]) the_table.auto_set_font_size(True) the_table.scale(1, 1.32) for i in range(len(ppe_ls)): clr = ppe_ls[i][2] for j in range(lim): the_table[(j, i)].get_text().set_color(clr) # set values for diagonal column labels hoffset = -0.3 #find this number from trial and error voffset = 1.0 #find this number from trial and error col_width = [0.06, 0.09, 0.09, 0.12, 0.133, 0.138, 0.128, 0.135, 0.142] col_labels2 = [['GLOVE SURGICAL', 'r'], ['GLOVE EXAM NITRILE', 'orange'], ['GLOVE GLOVE EXAM VINYL', 'goldenrod'], ['MASK FACE PROC. A-FOG', 'limegreen'], ['MASK PROC. FLUID RES.', 'green'], ['GOWN ISO. XL YELLOW', 'cornflowerblue'], ['MASK SURG. ANTI FOG W/FILM', 'blue'], ['SHIELD FACE FULL ANTI FOG', 'plum'], ['RESP. PART. FILTER REG', 'darkviolet']] count = 0 for i, val in enumerate(col_labels2): ax.annotate(' ' + val[0], xy=(hoffset + count * col_width[i], voffset), xycoords='axes fraction', ha='left', va='bottom', rotation=-25, size=8, c=val[1]) count += 1 plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=1.1, hspace=1.1) return Forecasted_cases_df_for_download, Forecasted_patient_census_df_for_download, Forecasted_ppe_needs_df_for_download
def fc_find_acceptance_interval_poisson(mu, background, x_bins, alpha): r"""Analytical acceptance interval for Poisson process with background. .. math :: \int_{x_{min}}^{x_{max}} P(x|mu)\mathrm{d}x = alpha For more information see :ref:`documentation <feldman_cousins>`. Parameters ---------- mu : float Mean of the signal background : float Mean of the background x_bins : array-like Bins in x alpha : float Desired confidence level Returns ------- (x_min, x_max) : tuple of floats Acceptance interval """ dist = poisson(mu=mu + background) x_bin_width = x_bins[1] - x_bins[0] p = [] r = [] for x in x_bins: p.append(dist.pmf(x)) # Implementing the boundary condition at zero muBest = max(0, x - background) probMuBest = poisson.pmf(x, mu=muBest + background) # probMuBest should never be zero. Check it just in case. if probMuBest == 0.0: r.append(0.0) else: r.append(p[-1] / probMuBest) p = np.asarray(p) r = np.asarray(r) if sum(p) < alpha: raise ValueError("X bins don't contain enough probability to reach " "desired confidence level for this mu!") rank = rankdata(-r, method="dense") index_array = np.arange(x_bins.size) rank_sorted, index_array_sorted = zip(*sorted(zip(rank, index_array))) index_min = index_array_sorted[0] index_max = index_array_sorted[0] p_sum = 0 for i in range(len(rank_sorted)): if index_array_sorted[i] < index_min: index_min = index_array_sorted[i] if index_array_sorted[i] > index_max: index_max = index_array_sorted[i] p_sum += p[index_array_sorted[i]] if p_sum >= alpha: break return x_bins[index_min], x_bins[index_max] + x_bin_width
n1 = pbc(L, Lx, Ly, Lz[0], x, y, z, X13, Y13, Z13, np13) n2 = pbc(L, Lx, Ly, Lz[1], x, y, z + Lz[0], X14, Y14, Z14, np14) n3 = pbc(L, Lx, Ly, Lz[2], x, y, z + Lz[0] + Lz[1], X15, Y15, Z15, np15) Nhalos.append(n1 + n2 + n3) i += 1 #print Nhalos nhalos = np.linspace(0, np.amax(Nhalos), np.amax(Nhalos) + 1) print "Nhalos mean", np.mean(Nhalos) print "Standard deviation", np.std(Nhalos) Poisson = [] print "Len nhalos", len(nhalos) l = np.mean(Nhalos) for i in range(len(nhalos)): Poisson.append(poisson.pmf(nhalos[i], l)) f = open("Nhalo.dat", "w") for i in range(len(Nhalos)): f.write(str(Nhalos[i])) f.close() plt.hist(Nhalos, range=(0, np.amax(Nhalos)), bins=(np.amax(Nhalos) + 3.0) / 9.0, normed=True) #plt.hist(Nhalos, bins=(np.amax(Nhalos)+3.0)/9.0, normed=True) plt.plot(nhalos, Poisson, c='r', linewidth='2.5') plt.xlabel(r"$\mathrm{N}$", fontsize=25) plt.ylabel(r"$\mathrm{P(N)}$", fontsize=25) #plt.text(20, 0.02, r"$\mathrm{L = 1.58^8L_{\odot}}$", fontsize=20) #plt.text(20, 0.017, r"$\mathrm{M_{halo} = 7.28^{10}M_{\odot}}$", fontsize=20)
def _pmf(self, x, l, p, w): return w * poisson.pmf(x, l) + (1 - w) * geom.pmf(x, p, loc=-1)
def poisson_probability(n, lam): global poisson_cache key = n * 10 + lam if key not in poisson_cache: poisson_cache[key] = poisson.pmf(n, lam) return poisson_cache[key]
# -*- coding: utf-8 -*- """ Created on Sat Apr 6 15:27:45 2019 @author: Liu Yang """ from scipy.stats import poisson import matplotlib.pyplot as plt import numpy as np plt.style.use('ggplot') lamda=[2,6,10,20] ps=[poisson(i) for i in lamda] x=np.arange(30) fig, ax = plt.subplots(nrows=1,ncols=2,figsize=(8,5)) for i in lamda: ax[0].plot(x,poisson.pmf(x,i),label='lamda='+str(i)) ax[1].plot(x,poisson.cdf(x,i),label='lamda='+str(i)) ax[0].set_xlabel('x') ax[1].set_xlabel('x') ax[0].set_ylabel('pmf') ax[1].set_ylabel('cdf') ax[0].legend(loc='best') ax[1].legend(loc='best') fig.suptitle('Poisson distribution') #fig.tight_layout() fig.savefig(r'C:\Users\10245\Desktop\数学笔记\泊松分布参数lamda的检验\poisson.png',dpi=300) plt.show()
def common_dists(): """Show some commonly used distributions.""" # prep the subplots fig, axes = plt.subplots(2, 3, figsize=(15, 10)) axes = axes.flatten() # gaussian mu, sigma = 0, 1 x = np.linspace(mu - 3 * sigma, mu + 3 * sigma, 100) axes[0].plot(x, norm.pdf(x, mu, sigma)) axes[0].set_title('Gaussian PDF') axes[0].set_ylabel('density') axes[0].set_xlabel('x') axes[0].annotate(r'$\mu$', xy=(mu, 0.4), xytext=(mu - 0.09, 0.3), arrowprops=dict(arrowstyle='->')) axes[0].annotate('', xy=(mu - sigma, 0.25), xytext=(mu + sigma, 0.25), arrowprops=dict(arrowstyle='|-|, widthB=0.5, widthA=0.5')) axes[0].annotate(r'$2\sigma$', xy=(mu - 0.15, 0.22)) # uniform distribution defined by min (a) and max (b) a, b = 0, 1 peak = 1 / (b - a) axes[1].plot([a, a, b, b], [0, peak, peak, 0]) axes[1].set_title('Uniform PDF') axes[1].set_ylabel('density') axes[1].set_xlabel('x') axes[1].annotate('min', xy=(a, peak), xytext=(a + 0.2, peak - 0.2), arrowprops=dict(arrowstyle='->')) axes[1].annotate('max', xy=(b, peak), xytext=(b - 0.3, peak - 0.2), arrowprops=dict(arrowstyle='->')) axes[1].set_ylim(0, 1.5) # exponential x = np.linspace(0, 5, 100) axes[2].plot(x, expon.pdf(x, scale=1 / 3)) axes[2].set_title('Exponential PDF') axes[2].set_ylabel('density') axes[2].set_xlabel('x') axes[2].annotate(r'$\lambda$ = 3', xy=(0, 3), xytext=(0.5, 2.8), arrowprops=dict(arrowstyle='->')) # Bernoulli of coin toss axes[3].bar(['heads', 'tails'], bernoulli.pmf([0, 1], p=0.5)) axes[3].set_title('Bernoulli with fair coin toss (p = 0.5)') axes[3].set_ylabel('probability') axes[3].set_xlabel('coin toss result') axes[3].set_ylim(0, 1) # Binomial of tossing a fair coin many times x = np.arange(0, 10) axes[4].plot(x, binom.pmf(x, n=x.shape, p=0.5), linestyle='--', marker='o') axes[4].set_title('Binomial PMF - many Bernoulli trials') axes[4].set_ylabel('probability') axes[4].set_xlabel('number of heads') # Poisson PMF (probability mass function) because this is a discrete random variable x = np.arange(0, 10) axes[5].plot(x, poisson.pmf(x, mu=3), linestyle='--', marker='o') axes[5].set_title('Poisson PMF') axes[5].set_ylabel('mass') axes[5].set_xlabel('x') axes[5].annotate(r'$\lambda$ = 3', xy=(3, 0.225), xytext=(1.9, 0.2), arrowprops=dict(arrowstyle='->')) # add a title plt.suptitle('Some commonly used distributions', fontsize=15, y=0.95) return axes
from scipy.stats import poisson ''' 1.Find the probability that atmost 5 defective fuses will be found in a box of 200 fuses if experience shows that 2 per cent of such fuses are defective. ''' print("Assignment 1") print("Probability of atmost 5 Defective", poisson.cdf(k=5, mu=200 * 0.02)) print("\n") ''' 2.The number of accidents in a year attributed to taxi drivers in a city follows a Poisson distribution with mean equal to 3. Out of 1,000 taxi drivers, find approximately the number of drivers with a)No accidents in a year b)More than 3 accidents in a year ''' print("Assignment 2") print("No of drivers with no accidents in a year", poisson.pmf(k=0, mu=3) * 1000) print("No of drivers with more than 3 accidents in a year", (1 - poisson.cdf(k=3, mu=3)) * 1000) print("\n") ''' 3.From the records of 10 Indian Army corps kept over 20 years the following data were obtained showing the number of deaths caused by the horse. Calculate the theoretical Poisson frequencies No of Deaths: 0 1 2 3 4 Total Frequency: 109 65 22 3 1 200 ''' print("Assignment 3") mean_value = (0 * 109 + 1 * 65 + 2 * 22 + 3 * 3 + 4 * 1) / 200 print("Frequencies of Deaths") for k in np.arange(4 + 1): print("No of Deaths={}, Frequency={}"\
import pandas as pd import numpy as np import pickle from scipy.stats import poisson from matplotlib import pyplot as plt import utils with open('2010-2018_patched_df.p', 'rb') as f: df = pickle.load(f) low = 1550 high = 1600 mask = (df.Elo_Score_Before_1 > low) & (df.Elo_Score_Before_1 < high) & ( df.Elo_Score_Before_2 > low) & (df.Elo_Score_Before_2 < high) scores = np.concatenate( (df[mask]['Score_1'].values, df[mask]['Score_2'].values)) mean = np.average(scores) print(mean) # plt.hist(df.Elo_Score_Before_1) # plt.show() for i in range(10): print(i, np.sum(scores == i) / len(scores), poisson.pmf(i, mean))