def calculate_gender(cls, country="uk_plus"): """ apply transformation function to source names """ types = {"name": "string", "F": "int", "M": "int", "source": "string"} df = pd.read_csv(cls.tidy_data_file(country), dtype=types) # calculate relative proportions df["total"] = df.M + df.F df["f_proportion"] = df.F / df.total df["m_proportion"] = df.M / df.total # find winner df["winner"] = df[["M", "F"]].max(axis="columns") df["winner_proportion"] = df.winner / df.total # set predicted value df["predicted"] = "M" df.loc[df["F"] == df["winner"], "predicted"] = "F" # calculate lower bar variance = df.apply(lambda x: binom.var(x.total, x.winner_proportion), axis="columns") df["lower"] = (df.winner - (np.sqrt(variance) * cls.z)) / df.total df.to_csv(cls.calc_data_file(country), index=False)
def variance(self): """ Compute the variance of the distribution Returns: -------- variance : float """ return binom.var(self.__n, self.__p)
def aggregate(): #Group Polls by State polls_by_state = {} for state in State.states: state_name = state.name polls_by_state[state] = [] for poll in Poll.polls: poll_state = poll.state.name if state_name == poll_state: polls_by_state[state].append(poll) #Aggregating Polls into singular rating for each State for key in polls_by_state.keys(): rating = {} d_sum = 0 r_sum = 0 error_sq_sum = 0 n = len(polls_by_state[key]) if n > 0: for poll in polls_by_state[key]: d_sum += poll.d r_sum += poll.r error_sq_sum += poll.error**2 rating['D'] = round(d_sum / n, 1) rating['R'] = round(r_sum / n, 1) rating['error'] = round(math.sqrt(error_sq_sum), 2) else: n = key.election16['Total'] d = key.election16['D'] r = key.election16['R'] prob_d = d / n prob_r = r / n var_d = binom.var(n, prob_d) var_r = binom.var(n, prob_r) se_d = math.sqrt(var_d / n) se_r = math.sqrt(var_r / n) rating['D'] = round(100 * prob_d, 1) rating['R'] = round(100 * prob_r, 1) rating['error'] = round(math.sqrt(se_d**2 + se_r**2), 2) key.poll_rating = rating
def test_inversion_diffs(self): cfg = AppSettings() reps = 1000 deltas = [] # observed number of differences for _ in range(0, reps): dna = Chromosome() old_seq = dna.sequence dna.inversion() deltas.append( sum(1 for a, b in zip(old_seq, dna.sequence) if a != b)) pmfs = [] expected_deltas = [] # expected differences # Assumes the length of an inversion is drawn from a negative binomial # distribution. Calculates the probability of each length until # 99.99% of the distribution is accounted for. The expected number of # differences for each length is multiplied by the probability of that length # and the sum of that gives the expected differences overall. k = 0 while sum(pmfs) <= 0.9999: pmf = nbinom.pmf(k, 1, (1 - cfg.genetics.mutation_length / (1 + cfg.genetics.mutation_length))) pmfs.append(pmf) diffs = math.floor( k / 2) * (1 - 1 / len(Chromosome.nucleotides())) * 2 expected_deltas.append(pmf * diffs) k += 1 expected_delta = sum(expected_deltas) # Since we are multiplying the binomial distribution (probably of differences at # a given lenght) by a negative binomial distribution (probability of a length) # we must compute the variance of two independent random variables # is Var(X * Y) = var(x) * var(y) + var(x) * mean(y) + mean(x) * var(y) # http://www.odelama.com/data-analysis/Commonly-Used-Math-Formulas/ mean_binom = cfg.genetics.mutation_length var_binom = binom.var(mean_binom, 1 / (len(Chromosome.nucleotides()))) mean_nbinom = cfg.genetics.mutation_length var_nbinom = nbinom.var(cfg.genetics.mutation_length, mean_nbinom / (1 + mean_nbinom)) var = var_binom * var_nbinom + \ var_binom * mean_nbinom + \ mean_binom * var_nbinom observed_delta = sum(deltas) / reps conf_99 = ((var / reps)**(1 / 2)) * 5 assert expected_delta - conf_99 < observed_delta < expected_delta + conf_99
def demo13(): n = 100 p = 0.25 x = np.array(range(0, n + 1)) prob = np.array([binom.pmf(k, n, p) for k in x]) print(binom.mean(n, p)) print(binom.var(n, p)) print(binom.std(n, p)) plt.xlabel('x') plt.ylabel('Possibility') plt.bar(x, prob) plt.show()
def var(self, dist): return binom.var(*self._get_params(dist))
import matplotlib.pyplot as plt import numpy as np from scipy.stats import binom a = 4 b = 33 fig, ax = plt.subplots(1, 1) n = 400 step = 1 p = float(1) / float(1 + b) mean, var, skew, kurt = binom.stats(n, p, moments='mvsk') print binom.var(n, p) print binom.expect(lambda x: x, args=(n, p)) print binom.expect(lambda x: x ** 2, args=(n, p)) # x = np.arange(binom.ppf(0.00001, n, p), binom.ppf(0.99999, n, p)) # x = np.arange(binom.ppf(0.01, n, p), binom.ppf(0.99, n, p)) x = np.arange(binom.ppf(0.001, n, p), binom.ppf(0.999, n, p), step) y = np.array(binom.pmf(x, n, p), dtype=float) def squarer(pos1=1, pos2=len(x)): square = 0 if pos2 > len(x): pos2 -= len(x) for i in range(pos1, pos2): square += (float(y[i - 1] + y[i]) / float(2)) * (x[i] - x[i - 1]) return square
# -*- coding: utf-8 -*- """ Created on Sun Mar 22 13:46:12 2020 @author:Shaurya Prakash """ from scipy.stats import binom """ 4 coins weere tossed simultaneouly , what is probablility of getting 2 heas? """ n = 4 p = 0.5 x = 2 probablity_of_getting_2_heads = binom.pmf(x, n, p) probablity_of_getting_atmost_2_heads = binom.cdf(x, n, p) mean = binom.mean(n, p) variance = binom.var(n, p)
from scipy.stats import binom import numpy as np # Binomal Distribution n = 8 k = 4 p = 0.5 q = 1 - p expect = binom.expect(args=(n, p)) mean = binom.mean(n, p) var = binom.var(n, p) sigma = binom.std(n, p) mode = np.floor((n + 1) * p) pmf = binom.pmf(k, n, p) cdf = binom.cdf(k, n, p) ppf = binom.ppf(q, n, p) print('expected value = ', expect) print('mean = ', mean) print('variance = ', var) print('std. dev. = ', sigma) print('mode = ', mode) print('pmf = ', pmf) print('cdf = ', cdf) print('ppf = ', ppf)