def plot_distribution(self): """ Plot the distrubution of estimated Coronavirus cases in Dhaka """ p = self.calculate_pro_detected_overseas() n = self.international.cases fig, ax = plt.subplots(1, 1) x = np.arange(nbinom.ppf(0.025, n, p), nbinom.ppf(0.975, n, p)) ax.vlines(x, 0, nbinom.pmf(x, n, p), color='lightblue', lw=5, alpha=0.5) ax.set_title(" pmf of coronavirus cases in Dhaka " + self.date)
def qNBI(q: float, location: np.ndarray, scale: np.ndarray): """Quantile function. """ n = 1 / scale p = n / (n + location) if len(scale) > 1: quant = np.where(scale > 1e-04, nbinom.ppf(q=q, n=n, p=p), poisson.ppf(q=q, mu=location)) else: quant = poisson.ppf(q=q, mu=location) if scale < 1e-04 else nbinom.ppf( q=q, n=n, p=p) return quant
def calc_coverage_threshold(cov_dict): ''' calculate minimum coverage threshold for each key in cov_dict. see end of 'alternative parameterization' section of Negative binomial page and scipy negative binomial documentation for details of calculation. ''' threshold_dict = {} for g in cov_dict: mean = float(cov_dict[g]['mean']) var = float(cov_dict[g]['variance']) q = (var-mean)/var n = mean**2/(var-mean) p = 1 - q ## assert that I did the math correctly. assert(isclose(nbinom.mean(n,p), mean)) assert(isclose(nbinom.var(n,p), var)) ## find the integer threshold that includes ~95% of REL606 distribution, ## excluding 5% on the left hand side. my_threshold = nbinom.ppf(0.05,n,p) my_threshold_p = nbinom.cdf(my_threshold,n,p) threshold_dict[g] = {'threshold':str(my_threshold), 'threshold_p':str(my_threshold_p)} return threshold_dict
def _ppf(self, q, mu, alpha, p, w): s, p = self.convert_params(mu, alpha, p) # we just translated and stretched q to remove zi q_mod = (q - w) / (1 - w) x = nbinom.ppf(q_mod, s, p) # set to zero if in the zi range x[q < w] = 0 return x
def plot_nbinom(r, p): left = nbinom.ppf(0.01, r, p) right = nbinom.ppf(0.99, r, p) x = np.arange( left, right, int((right - left) / 10) ) plt.plot( x, nbinom.pmf(x, r, p), alpha=0.6, color='gray' ) plt.plot( x, nbinom.pmf(x, r, p), 'o', label='$r=%s, p = %s$' % (r, p) )
async def challenge( self, bot, event: Message, successes: int, chance: str, ): if successes < SUCCESSES_MIN or successes > SUCCESSES_MAX: await bot.say( event.channel, f'성공횟수는 {SUCCESSES_MIN}회 이상,' f' {SUCCESSES_MAX:,}회 이하로 입력해주세요!', ) return try: if chance.endswith('%'): p = Decimal(chance[:-1]) / 100 else: p = Decimal(chance) except InvalidOperation: await bot.say(event.channel, '정상적인 확률을 입력해주세요!') return if p < CHANCE_MIN or p > CHANCE_MAX: await bot.say( event.channel, f'확률값은 {to_percent(CHANCE_MIN)}% 이상,' f' {to_percent(CHANCE_MAX)}% 이하로 입력해주세요!', ) return if p / successes < CHANCE_MIN: await bot.say(event.channel, '입력하신 확률값에 비해 성공 횟수가 너무 많아요!') return counts = { int(math.ceil(nbinom.ppf(float(q), successes, float(p)))) for q in filter(lambda x: x >= p, CHANCES + [p]) } results = [ (x, Decimal(str(nbinom.cdf(x, successes, float(p))))) for x in sorted(counts) ] text = '\n'.join( f'- {tries+successes:,}번 시도하시면 {to_percent(ch, D001)}% 확률로' f' 목표 횟수만큼 성공할 수 있어요!' for tries, ch in results ) await bot.say( event.channel, f'{to_percent(p)}% 확률의 도전을 {successes:,}번' f' 성공시키려면 몇 회의 도전이 필요한지 알려드릴게요!\n{text}', )
def neg_binom_demand_distribution(C, t, r=10, p=.5): n = int(C.shape[0]) U = np.linalg.cholesky(C) raw_demand = np.random.normal(size=(t, n)) shifted_demand = np.dot(raw_demand, U.T) flat_demand = flatten_matrix(shifted_demand) true_std = np.std(flat_demand) true_mean = np.mean(flat_demand) normalized_demand = (shifted_demand - true_mean) / true_std new_demand = nbinom.ppf(norm.cdf(normalized_demand), r, p) return new_demand.T
def gen_ztnegbinom(n, mu, size): """Zero truncated negative binomial distribution. input: n, int number of successes mu, float or int number of trials size, float probability of success output: ztnb, list of int draws from a zero truncated negative binomial distribution """ temp = nbinom.pmf(0, mu, size) p = [uniform.rvs(loc=temp[i], scale=1-temp[i]) for i in range(n)] ztnb = [int(nbinom.ppf(p[i], mu[i], size)) for i in range(n)] return np.array(ztnb)
def generate_graph_data(self): ageGroup = self.tableModel.data[self.selected_item_index.row()][0] parameter = self.tableModel.data[self.selected_item_index.row()][1] p1 = self.temporaryParametersDict[ageGroup][parameter]["p1"] p2 = self.temporaryParametersDict[ageGroup][parameter]["p2"] distributionType = self.temporaryParametersDict[ageGroup][parameter][ "distributionType"] xyDict = {"x": [], "y": []} try: if distributionType == 'Binomial': xyDict["x"] = np.arange(binom.ppf(0.01, int(p1), p2 / 100), binom.ppf(0.99, int(p1), p2 / 100)) xyDict["y"] = binom.pmf(xyDict["x"], int(p1), p2 / 100) elif distributionType == 'Geometric': xyDict["x"] = np.arange(geom.ppf(0.01, p1 / 100), geom.ppf(0.99, p1 / 100)) xyDict["y"] = geom.pmf(xyDict["x"], p1 / 100) if p2 != 0: self.tableModel.setData( self.selected_item_index.sibling( self.selected_item_index.row(), 3), 0, Qt.EditRole) elif distributionType == 'Laplacian': xyDict["x"] = np.arange(dlaplace.ppf(0.01, p1 / 100), dlaplace.ppf(0.99, p1 / 100)) xyDict["y"] = dlaplace.pmf(xyDict["x"], p1 / 100) if p2 != 0: self.tableModel.setData( self.selected_item_index.sibling( self.selected_item_index.row(), 3), 0, Qt.EditRole) elif distributionType == 'Logarithmic': xyDict["x"] = np.arange(logser.ppf(0.01, p1 / 100), logser.ppf(0.99, p1 / 100)) xyDict["y"] = logser.pmf(xyDict["x"], p1 / 100) if p2 != 0: self.tableModel.setData( self.selected_item_index.sibling( self.selected_item_index.row(), 3), 0, Qt.EditRole) elif distributionType == 'Neg. binomial': xyDict["x"] = np.arange(nbinom.ppf(0.01, p1, p2 / 100), nbinom.ppf(0.99, p1, p2 / 100)) xyDict["y"] = nbinom.pmf(xyDict["x"], p1, p2 / 100) elif distributionType == 'Planck': xyDict["x"] = np.arange(planck.ppf(0.01, p1 / 100), planck.ppf(0.99, p1 / 100)) xyDict["y"] = planck.pmf(xyDict["x"], p1 / 100) if p2 != 0: self.tableModel.setData( self.selected_item_index.sibling( self.selected_item_index.row(), 3), 0, Qt.EditRole) elif distributionType == 'Poisson': xyDict["x"] = np.arange(poisson.ppf(0.01, p1), poisson.ppf(0.99, p1)) xyDict["y"] = poisson.pmf(xyDict["x"], p1) if p2 != 0: self.tableModel.setData( self.selected_item_index.sibling( self.selected_item_index.row(), 3), 0, Qt.EditRole) elif distributionType == 'Uniform': if p1 - 0.5 * p2 < 0: p2 = p1 min = p1 - 0.5 * p2 max = p1 + 0.5 * p2 xyDict["x"] = np.arange(randint.ppf(0.01, min, max), randint.ppf(0.99, min, max)) xyDict["y"] = randint.pmf(xyDict["x"], min, max) elif distributionType == 'Zipf (Zeta)': xyDict["x"] = np.arange(zipf.ppf(0.01, p1), zipf.ppf(0.99, p1)) xyDict["y"] = zipf.pmf(xyDict["x"], p1) if p2 != 0: self.tableModel.setData( self.selected_item_index.sibling( self.selected_item_index.row(), 3), 0, Qt.EditRole) self.update_graph(xyDict) except Exception as E: log.error(E)
pstRRM.append(testRRM) testRRm=1.+infperiod*ln( gamma.ppf(0.01, a=alpha, scale=1./beta) ) if (testRRm <0.): testRRm=0. pstRRm.append(testRRm) #print('estimated RR=',RRest,testRRm,testRRM) # to see the numbers for the evolution of Rt if (new_cases>0. and old_new_cases>0.): NewCases.append(new_cases) # Using a Negative Binomial as the Posterior Predictor of New Cases, given old one # This takes parameters r,p which are functions of new alpha, beta from Gamma r, p = alpha, beta/(old_new_cases+beta) mean, var, skew, kurt = nbinom.stats(r, p, moments='mvsk') pred.append(mean) # the expected value of new cases testciM=nbinom.ppf(0.99, r, p) # these are the boundaries of the 99% confidence interval for new cases pstdM.append(testciM) testcim=nbinom.ppf(0.01, r, p) pstdm.append(testcim) newp=p newr=r flag=0 while (new_cases>testciM or new_cases<testcim): if (flag==0): anomalyday.append(dates[i+1]) # the first new cases are at i=2 anomalypred.append(new_cases) #print("anomaly",testcim,new_cases,testciM,nr,np) #New cases when falling outside the 99% CI #annealing: increase variance so as to encompass anomalous observation: allow Bayesian code to recover # mean of negbinomial=r*(1-p)/p variance= r (1-p)/p**2
def zero_truncated_NB(size, n, p, poissonLimit=False, quantile=0.999, MHSteps=100): """ returns a sample of size "size" from the negative binomial distribution with parameters n, p under the condition that at least one element in the sample is nonzero. MHSteps denotes the number of Metropolis-Hastings iterations """ if p == 1: poissonLimit = True # if obtaining a random sample with total count 0 is sufficiently unlikely, # sample until a suitable sample is found. if poissonLimit: zeroP = np.exp(-size * n) else: zeroP = p**(size * n) if zeroP < 0.7: while not poissonLimit: result = np.random.negative_binomial(n, p, size) if result.any(): return result while poissonLimit: result = np.random.poisson(n, size) if result.any(): return result # pmf of truncated negative binomial for total count q = min(quantile * (1 - zeroP) + zeroP, 0.999999) if poissonLimit: maxbin = poisson.ppf(q, size * n) else: dist = nbinom(n, p) maxbin = nbinom.ppf(q, size * n, p) maxbin = max(maxbin, 5) x = np.arange(1, maxbin + 1) if poissonLimit: trunc_pmf = poisson.pmf(x, size * n) else: trunc_pmf = nbinom.pmf(x, size * n, p) trunc_pmf /= np.sum(trunc_pmf) # sampling the total count value totalCount = np.random.choice(x, p=trunc_pmf) if poissonLimit: return np.random.multinomial(totalCount, np.full(size, 1 / size)) elif totalCount == 1: # if only one observation has been made, it does not matter when result = np.zeros(size) result[0] = 1 return result elif totalCount == 2: # if two observations have been made, we have to decide whether they # occurred in the same sample or in distinct samples # when computing the joint probabilities of the possible events, I # neglect factors that appear in all probabilities # p11 = (size choose 2) * pmf(1)**2 p11 = (size - 1) / 2 * dist.pmf(1)**2 # p20 = size * pmf(2) * pmf(0) p20 = dist.pmf(2) * dist.pmf(0) norm = p11 + p20 p11 /= norm p20 /= norm result = np.zeros(size) if np.random.choice([True, False], p=[p11, p20]): result[:2] = 1 else: result[0] = 2 return result elif totalCount == 3: # p111 = (size choose 3) * pmf(1, n, p)**3 p111 = (size - 1) * (size - 2) / 6 * dist.pmf(1)**3 p210 = (size - 1) * dist.pmf(2) * dist.pmf(1) * dist.pmf(0) p300 = dist.pmf(3) * dist.pmf(0)**2 ps = np.array([p111, p210, p300]) ps /= np.sum(ps) result = np.zeros(size) choice = np.random.choice(np.arange(3), p=ps) if choice == 0: result[:3] = 1 elif choice == 1: result[0] = 2 result[1] = 1 elif choice == 2: result[0] = 3 return result else: return _dist_bins_MH(size, totalCount, dist, MHSteps)
def run_luis_model(df: pd.DataFrame, filepath: Path) -> None: infperiod = 4.5 # length of infectious period, adjust as needed def smooth(y, box_pts): box = np.ones(box_pts) / box_pts y_smooth = np.convolve(y, box, mode='same') return y_smooth # Loop through states states = df['state'].unique() returndf = pd.DataFrame() for state in states: from scipy.stats import gamma # not sure why this needs to be recalled after each state, but otherwite get a type exception import numpy as np statedf = df[df['state'] == state].sort_values('date') confirmed = list(statedf['positive']) dates = list(statedf['date']) day = list(range(1, len(statedf['date']) + 1)) if (confirmed[-1] < 10.): continue # this skips the Rt analysis for states for which there are <10 total cases ##### estimation and prediction dconfirmed = np.diff(confirmed) for ii in range(len(dconfirmed)): if dconfirmed[ii] < 0.: dconfirmed[ii] = 0. xd = dates[1:] sdays = 15 yy = smooth( dconfirmed, sdays ) # smoothing over sdays (number of days) moving window, averages large chunking in reporting in consecutive days yy[-2] = ( dconfirmed[-4] + dconfirmed[-3] + dconfirmed[-2] ) / 3. # these 2 last lines should not be necesary but the data tend to be initially underreported and also the smoother struggles. yy[-1] = (dconfirmed[-3] + dconfirmed[-2] + dconfirmed[-1]) / 3. #lyyy=np.cumsum(lwy) TotalCases = np.cumsum( yy ) # These are confirmed cases after smoothing: tried also a lowess smoother but was a bit more parameer dependent from place to place. alpha = 3. # shape parameter of gamma distribution beta = 2. # rate parameter of gamma distribution see https://en.wikipedia.org/wiki/Gamma_distribution valpha = [] vbeta = [] pred = [] pstdM = [] pstdm = [] xx = [] NewCases = [] predR = [] pstRRM = [] pstRRm = [] anomalyday = [] anomalypred = [] for i in range(2, len(TotalCases)): new_cases = float(TotalCases[i] - TotalCases[i - 1]) old_new_cases = float(TotalCases[i - 1] - TotalCases[i - 2]) # This uses a conjugate prior as a Gamma distribution for b_t, with parameters alpha and beta alpha = alpha + new_cases beta = beta + old_new_cases valpha.append(alpha) vbeta.append(beta) mean = gamma.stats(a=alpha, scale=1 / beta, moments='m') RRest = 1. + infperiod * ln(mean) if (RRest < 0.): RRest = 0. predR.append(RRest) testRRM = 1. + infperiod * ln( gamma.ppf(0.99, a=alpha, scale=1. / beta) ) # these are the boundaries of the 99% confidence interval for new cases if (testRRM < 0.): testRRM = 0. pstRRM.append(testRRM) testRRm = 1. + infperiod * ln( gamma.ppf(0.01, a=alpha, scale=1. / beta)) if (testRRm < 0.): testRRm = 0. pstRRm.append(testRRm) if (new_cases == 0. or old_new_cases == 0.): pred.append(0.) pstdM.append(10.) pstdm.append(0.) NewCases.append(0.) if (new_cases > 0. and old_new_cases > 0.): NewCases.append(new_cases) # Using a Negative Binomial as the Posterior Predictor of New Cases, given old one # This takes parameters r,p which are functions of new alpha, beta from Gamma r, p = alpha, beta / (old_new_cases + beta) mean, var, skew, kurt = nbinom.stats(r, p, moments='mvsk') pred.append(mean) # the expected value of new cases testciM = nbinom.ppf( 0.99, r, p ) # these are the boundaries of the 99% confidence interval for new cases pstdM.append(testciM) testcim = nbinom.ppf(0.01, r, p) pstdm.append(testcim) np = p nr = r flag = 0 while (new_cases > testciM or new_cases < testcim): if (flag == 0): anomalypred.append(new_cases) anomalyday.append( dates[i + 1]) # the first new cases are at i=2 # annealing: increase variance so as to encompass anomalous observation: allow Bayesian code to recover # mean of negbinomial=r*(1-p)/p variance= r (1-p)/p**2 # preserve mean, increase variance--> np=0.8*p (smaller), r= r (np/p)*( (1.-p)/(1.-np) ) # test anomaly nnp = 0.95 * np # this doubles the variance, which tends to be small after many Bayesian steps nr = nr * (nnp / np) * ( (1. - np) / (1. - nnp) ) # this assignement preserves the mean of expected cases np = nnp mean, var, skew, kurt = nbinom.stats(nr, np, moments='mvsk') testciM = nbinom.ppf(0.99, nr, np) testcim = nbinom.ppf(0.01, nr, np) flag = 1 else: if (flag == 1): alpha = nr # this updates the R distribution with the new parameters that enclose the anomaly beta = np / (1. - np) * old_new_cases testciM = nbinom.ppf(0.99, nr, np) testcim = nbinom.ppf(0.01, nr, np) # annealing leaves the RR mean unchanged, but we need to adjus its widened CI: testRRM = 1. + infperiod * ln( gamma.ppf(0.99, a=alpha, scale=1. / beta) ) # these are the boundaries of the 99% confidence interval for new cases if (testRRM < 0.): testRRM = 0. testRRm = 1. + infperiod * ln( gamma.ppf(0.01, a=alpha, scale=1. / beta)) if (testRRm < 0.): testRRm = 0. pstRRM = pstRRM[: -1] # remove last element and replace by expanded CI for RRest pstRRm = pstRRm[:-1] pstRRM.append(testRRM) pstRRm.append(testRRm) # visualization of the time evolution of R_t with confidence intervals x = [] for i in range(len(predR)): x.append(i) days = dates[3:] xd = days dstr = [] for xdd in xd: dstr.append(xdd.strftime("%Y-%m-%d")) appenddf = pd.DataFrame({ 'state': state, 'date': days, 'RR_pred_luis': predR, 'RR_CI_lower_luis': pstRRm, 'RR_CI_upper_luis': pstRRM }) returndf = pd.concat([returndf, appenddf], axis=0) returndf.to_csv(filepath / "luis_code_estimates.csv", index=False)
def test_ppf(self): n, p = sm.distributions.zinegbin.convert_params(5, 1, 1) nbinom_ppf = nbinom.ppf(0.71, n, p) zinbinom_ppf = sm.distributions.zinegbin.ppf(0.71, 5, 1, 1, 0) assert_allclose(nbinom_ppf, zinbinom_ppf, rtol=1e-12, atol=1e-12)
from scipy.stats import nbinom import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) # Calculate a few first moments: n, p = 0.4, 0.4 mean, var, skew, kurt = nbinom.stats(n, p, moments='mvsk') # Display the probability mass function (``pmf``): x = np.arange(nbinom.ppf(0.01, n, p), nbinom.ppf(0.99, n, p)) ax.plot(x, nbinom.pmf(x, n, p), 'bo', ms=8, label='nbinom pmf') ax.vlines(x, 0, nbinom.pmf(x, n, p), colors='b', lw=5, alpha=0.5) # Alternatively, the distribution object can be called (as a function) # to fix the shape and location. This returns a "frozen" RV object holding # the given parameters fixed. # Freeze the distribution and display the frozen ``pmf``: rv = nbinom(n, p) ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1, label='frozen pmf') ax.legend(loc='best', frameon=False) plt.show()
def collect_and_plot_passes_nb(teams_list=None, teams_dict=None, plot_output=['single', 'all'], teams_col_dict=None): team_sequences = {} dict_of_passing_stats = {} all_sequences = [] for tm in teams_list: passing_stats = {} df = teams_dict[tm] list_of_dates = set(df['Date/Time']) date_sequences = {} for d in list_of_dates: df_filter = df[df['Date/Time'] == d] df_filter = df_filter[df_filter['Event Type'] != 'Cessation'] opponent = df_filter['Opponent'].iloc[0] kee = str(d) + ' | ' + opponent date_sequences[kee] = get_sequences(df_filter) team_sequences[tm] = date_sequences counts = convert_date_sequences_to_list_and_count(date_sequences) all_sequences.extend(counts) x_values_for_barplot = [key for key, group in groupby(counts)] y_values_for_barplot = [ i / sum([len(list(group)) for key, group in groupby(counts)]) for i in [len(list(group)) for key, group in groupby(counts)] ] ## (GP) NB Estimation mu = sum(counts) / len(counts) sigma = math.sqrt( sum([(mu - float(i))**2 for i in counts]) / (len([(mu - float(i))**2 for i in counts]) - 1)) r = (mu**2) / (sigma**2 - mu) p = (mu) / (sigma**2) mean, var, skew, kurt = nbinom.stats(r, p, moments='mvsk') passing_stats['nb_probability'] = p passing_stats['nb_r'] = r passing_stats['avg_passes'] = mean passing_stats['var_passes'] = sigma**2 passing_stats['nb_skew'] = skew passing_stats['nb_kurtosis'] = kurt dict_of_passing_stats[tm] = passing_stats if plot_output == 'single': x_values_for_nb = np.arange(nbinom.ppf(0.01, r, p), nbinom.ppf(0.9999, r, p)) y_values_for_nb = nbinom.pmf(x_values_for_nb, r, p) fig = go.Figure(data=[ go.Bar(x=x_values_for_barplot, y=y_values_for_barplot, marker_color=teams_col_dict[tm], marker_line_color="black", name="Passes Completed") ]) fig.add_trace( go.Scatter(x=x_values_for_nb, y=y_values_for_nb, marker_color="black", mode='lines', name='Negative Binomial Approximation')) fig.update_layout( title="{}: Catch Counts, with Negative Binomial Estimation". format(tm), xaxis_title="n Number of Catches", yaxis_title="Frequency", boxmode='group', plot_bgcolor='rgb(220,220,220)') iplot(fig) all_sequences.sort() if plot_output == 'all': mu_a = sum(all_sequences) / len(all_sequences) sigma_a = math.sqrt( sum([(mu_a - float(i))**2 for i in all_sequences]) / (len([(mu_a - float(i))**2 for i in all_sequences]) - 1)) r_a = (mu_a**2) / (sigma_a**2 - mu_a) p_a = (mu_a) / (sigma_a**2) mean_a, var_a, skew_a, kurt_a = nbinom.stats(r_a, p_a, moments='mvsk') x_values_for_barplot_a = [key for key, group in groupby(all_sequences)] y_values_for_barplot_a = [ i / sum([len(list(group)) for key, group in groupby(all_sequences)]) for i in [len(list(group)) for key, group in groupby(all_sequences)] ] x_values_for_nb_a = np.arange(nbinom.ppf(0.01, r_a, p_a), nbinom.ppf(0.9999, r_a, p_a)) y_values_for_nb_a = nbinom.pmf(x_values_for_nb_a, r_a, p_a) fig = go.Figure(data=[ go.Bar(x=x_values_for_barplot_a, y=y_values_for_barplot_a, marker_color="oldlace", marker_line_color="black", name="Passes Completed") ]) fig.add_trace( go.Scatter(x=x_values_for_nb_a, y=y_values_for_nb_a, marker_color="black", mode='lines', name='Negative Binomial Approximation')) fig.update_layout( title= "League Wide Catch Counts Per Possession, with Negative Binomial Estimation", xaxis_title="n Number of Catches in a Possession", yaxis_title="Frequency", boxmode='group', plot_bgcolor='rgb(220,220,220)') iplot(fig) return (dict_of_passing_stats, team_sequences, all_sequences)
def qzinegbin(p, size, pstr0, prob = None, munb = None, nVariables = None): """ Percent point function of a Zero Inflated Negative Binomial Distribution """ # Same nomenclature that R function. # 1. Requirements: # Size, munb, prob and pstr0 must have the same length than p. # Given that each species is a variable and its mean would be different from others, the best way to do it is pass a vector of means, and vector of size # This function does NOT work with single values of size, prob, munb, and pstr0. Need lists nSpecies = p.shape[1] nSamples = p.shape[0] if isinstance(size, float) or isinstance(size, int): #print('Need a list with one value per variable. Providing the argument nVariables the value will be repeated nVariables times') size = [size] * nVariables # Repeat the same size nVariables time if isinstance(prob, float) or isinstance(prob, int): #print('Need a list with one value per variable. Providing the argument nVariables the value will be repeated nVariables times') prob = [prob] * nVariables if isinstance(munb, float) or isinstance(munb, int): #print('Need a list with one value per variable. Providing the argument nVariables the value will be repeated nVariables times') munb = [munb] * nVariables if isinstance(pstr0, float) or isinstance(pstr0, int): #print('Need a list with one value per variable. Providing the argument nVariables the value will be repeated nVariables times') pstr0 = [pstr0] * nVariables # 2. Repeated munb, size, prob and pstr0 by each value of the same variable if len(munb): prob = [s/(s + m) for s, m in zip(size, munb)] # Number of values LLL = max(len(p.flatten()), len(prob), len(pstr0), len(size), len(munb)) p = np_rep_len(p.flatten(), LLL) if len(pstr0) != LLL: pstr0 = np_rep_len(pstr0, LLL) if len(prob) != LLL: prob = np_rep_len(prob, LLL) if len(size) != LLL: size = np_rep_len(size, LLL) if len(munb) != LLL: munb = np_rep_len(munb, LLL) # 3. Now everything have the proper length (same values -> same distribution for each variable). # 3.1 Create empty list (should be list to mix 'NA' strings with float, in numpy.array is valid just one datatype) ans = list(np.repeat(float('nan'), LLL)) prob0 = [p**s for p, s in zip(prob, size)] deflat_limit=[] for i in range(len(prob0)): if (1 - prob0[i]) == 0: #1- prob0[i] = 0 only when prob0[i] = 1 deflat_limit.append(float('-inf')) #elif prob0[i] < 0: # deflat_limit[i] = float('inf') elif ((1 - prob0[i]) == 0 and prob0[i] == 0): deflat_limit.append(float('nan')) else: deflat_limit.append(-prob0[i] / (1 - prob0[i])) for i in range(len(ans)): if p[i] <= pstr0[i]: ans[i] = 0 ind4 = [(pstr0[i] < p[i]) and (deflat_limit[i] <= pstr0[i]) for i in range(len(p))] q = [(p[i] - pstr0[i]) / (1 - pstr0[i]) for i in range(len(p)) if ind4[i]] n = [size[i] for i in range(len(size)) if ind4[i]] pr = [prob[i] for i in range(len(prob)) if ind4[i]] j = 0 for i in range(len(ind4)): if ind4[i]: ans[i] = nbinom.ppf(q = q[j], n = n[j], p = pr[j], loc = 0) # This function is not exactly equal to R function. R function return 0 and warnings in some cases, an python return nan j = j +1 for i in range(len(ans)): if pstr0[i] < deflat_limit[i]: ans[i] = float('nan') if 1 < pstr0[i]: ans[i] = float('nan') if p[i] < 0: ans[i] = float('nan') if 1 < p[i]: ans[i] = float('nan') return(np.array(ans).reshape(nSamples, nSpecies))
plt.clf() fig, ax = plt.subplots(1, 1) a = 2 b = 3 mean, var, skew, kurt = gamma.stats(a, moments='mvsk') print(mean,var,skew,kurt) mean, var, skew, kurt = gamma.stats(a, scale=b, moments='mvsk') print(mean,var,skew,kurt) x = np.linspace(gamma.ppf(0.01, a), gamma.ppf(0.99, a), 100) ax.plot(x, gamma.pdf(x, a), 'r-', lw=5, alpha=0.6, label='gamma-a=2,b=1 pdf') ax.plot(x, gamma.pdf(x, a, scale = b), 'r-', lw=5, alpha=0.6, label='gamma-a=2,b=3 pdf') ax.legend(loc='best', frameon=False) plt.show() fig, ax = plt.subplots(1, 1) n,p=5,0.5 mean, var, skew, kurt = nbinom.stats(n,p, moments='mvsk') print(mean,var,skew,kurt) start = nbinom.ppf(0.01, n,p) stop = nbinom.ppf(0.99, n,p) x = np.linspace(nbinom.ppf(0.01, n,p), nbinom.ppf(0.99, n,p), num = int(stop-start+1)) print(x) ax.plot(x, nbinom.pmf(x, n, p), 'bo', ms=8, label='nbinom pdf') ax.plot(x, poisson.pmf(x, 5), 'ro', ms=8, label='poisson pdf') ax.legend(loc='best', frameon=False) plt.show()
def analytical_MPVS( infection_ts: pd.DataFrame, smoothing: Callable, alpha: float = 3.0, # shape beta: float = 2.0, # rate CI: float = 0.95, # confidence interval infectious_period: int = 5*days, # inf period = 1/gamma, variance_shift: float = 0.99, # how much to scale variance parameters by when anomaly detected totals: bool = True # are these case totals or daily new cases? ): """Estimates Rt ~ Gamma(alpha, 1/beta), and implements an analytical expression for a mean-preserving variance increase whenever case counts fall outside the CI defined by a negative binomial distribution""" # infection_ts = infection_ts.copy(deep = True) dates = infection_ts.index if totals: # daily_cases = np.diff(infection_ts.clip(lower = 0)).clip(min = 0) # infection_ts clipped because COVID19India API does weird stuff daily_cases = infection_ts.clip(lower = 0).diff().clip(lower = 0).iloc[1:] else: daily_cases = infection_ts total_cases = np.cumsum(smoothing(np.squeeze(daily_cases))) v_alpha, v_beta = [], [] RR_pred, RR_CI_upper, RR_CI_lower = [], [], [] T_pred, T_CI_upper, T_CI_lower = [], [], [] new_cases_ts = [] anomalies = [] anomaly_dates = [] for i in range(2, len(total_cases)): new_cases = max(0, total_cases[i] - total_cases[i-1]) old_new_cases = max(0, total_cases[i-1] - total_cases[i-2]) alpha += new_cases beta += old_new_cases v_alpha.append(alpha) v_beta.append(beta) RR_est = max(0, 1 + infectious_period*np.log(Gamma.mean( a = alpha, scale = 1/beta))) RR_upper = max(0, 1 + infectious_period*np.log(Gamma.ppf(CI, a = alpha, scale = 1/beta))) RR_lower = max(0, 1 + infectious_period*np.log(Gamma.ppf(1-CI, a = alpha, scale = 1/beta))) RR_pred.append(RR_est) RR_CI_upper.append(RR_upper) RR_CI_lower.append(RR_lower) if (new_cases == 0 or old_new_cases == 0): if new_cases == 0: logger.debug("new_cases at time %s: 0", i) if old_new_cases == 0: logger.debug("old_new_cases at time %s: 0", i) T_pred.append(0) T_CI_upper.append(10) # <- where does this come from? T_CI_lower.append(0) new_cases_ts.append(0) if (new_cases > 0 and old_new_cases > 0): new_cases_ts.append(new_cases) r, p = alpha, beta/(old_new_cases + beta) T_pred.append(nbinom.mean(r, p)) T_upper = nbinom.ppf(CI, r, p) T_lower = nbinom.ppf(1-CI, r, p) T_CI_upper.append(T_upper) T_CI_lower.append(T_lower) _np = p _nr = r anomaly_noted = False counter = 0 while not (T_lower < new_cases < T_upper): if not anomaly_noted: anomalies.append(new_cases) anomaly_dates.append(dates[i]) # logger.debug("anomaly identified at time %s: %s < %s < %s, r: %s, p: %s, annealing iteration: %s", i, T_lower, new_cases, T_upper, _nr, _np, counter+1) # nnp = 0.95 *_np # <- where does this come from _nr = variance_shift * _nr * ((1-_np)/(1-variance_shift*_np) ) _np = variance_shift * _np T_upper = nbinom.ppf(CI, _nr, _np) T_lower = nbinom.ppf(1-CI, _nr, _np) T_lower, T_upper = sorted((T_lower, T_upper)) if T_lower == T_upper == 0: T_upper = 1 logger.debug("CI collapse, setting T_upper -> 1") anomaly_noted = True counter += 1 if counter >= 10000: raise ValueError("Number of iterations exceeded") else: if anomaly_noted: alpha = _nr # update distribution on R with new parameters that enclose the anomaly beta = _np/(1-_np) * old_new_cases T_pred[-1] = nbinom.mean(_nr, _np) T_CI_lower[-1] = nbinom.ppf(CI, _nr, _np) T_CI_upper[-1] = nbinom.ppf(1-CI, _nr, _np) # annealing leaves the RR mean unchanged, but we need to adjust its widened CI RR_upper = max(0, 1 + infectious_period * np.log(Gamma.ppf(CI , a = alpha, scale = 1/beta))) RR_lower = max(0, 1 + infectious_period * np.log(Gamma.ppf(1 - CI, a = alpha, scale = 1/beta))) # replace latest CI time series entries with adjusted CI RR_CI_upper[-1] = RR_upper RR_CI_lower[-1] = RR_lower return ( dates[2:], RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates )
def ssq(obs, n, p): exp = nbinom.ppf(q, n, p) ssq = np.sum([(x - exp[i]) ** 2 for i, x in enumerate(obs)]) return (ssq)
:param neg_binom_r_param: int (could be float too), parameter of neg binom distribution :param count_data: np array, count values we model with negative binomial :return: float, log likelihood """ num_counts = len(count_data) p = 1 - sum(count_data) / (num_counts * neg_binom_r_param + sum(count_data)) llh = sum(nbinom.logpmf(count_data, neg_binom_r_param, p)) return llh # set parameters n, p = 10, 0.4 # generate x values and get pmf x = np.arange(nbinom.ppf(0.01, n, p), nbinom.ppf(0.99, n, p)) pmf = nbinom.pmf(x, n, p) # plot plt.plot(pmf) plt.axvline(x=n) plt.show() # check whether peak occurs at correct value for n (r). r_vals = np.arange(5, 20) llh = np.zeros(len(r_vals)) counts = np.random.negative_binomial(n, p, size=100000) for ii, r in enumerate(r_vals): llh[ii] = max_llh_given_r_param(r, counts) # plot
def test_ppf_p2(self): n, p = sm.distributions.zinegbin.convert_params(100, 1, 2) nbinom_ppf = nbinom.ppf(0.27, n, p) zinbinom_ppf = sm.distributions.zinegbin.ppf(0.27, 100, 1, 2, 0) assert_allclose(nbinom_ppf, zinbinom_ppf, rtol=1e-12, atol=1e-12)
def _rvs(self, n, p): return nbinom.ppf(uniform(low=nbinom.pmf(0, n, p)), n, p)
def _ppf(self, q, n, p): return nbinom.ppf(nbinom.sf(0, n, p) * q + nbinom.pmf(0, n, p), n, p)
def get_ui(self, params: List[ndarray], bounds: Tuple[float, float]) -> np.ndarray: n = params[0] p = params[1] return [nbinom.ppf(bounds[0], n=n, p=p), nbinom.ppf(bounds[1], n=n, p=p)]