示例#1
0
    def calculate_gender(cls, country="uk_plus"):
        """
        apply transformation function to source names
        """
        types = {"name": "string", "F": "int", "M": "int", "source": "string"}
        df = pd.read_csv(cls.tidy_data_file(country), dtype=types)

        # calculate relative proportions
        df["total"] = df.M + df.F
        df["f_proportion"] = df.F / df.total
        df["m_proportion"] = df.M / df.total

        # find winner
        df["winner"] = df[["M", "F"]].max(axis="columns")
        df["winner_proportion"] = df.winner / df.total

        # set predicted value
        df["predicted"] = "M"
        df.loc[df["F"] == df["winner"], "predicted"] = "F"

        # calculate lower bar
        variance = df.apply(lambda x: binom.var(x.total, x.winner_proportion),
                            axis="columns")
        df["lower"] = (df.winner - (np.sqrt(variance) * cls.z)) / df.total

        df.to_csv(cls.calc_data_file(country), index=False)
示例#2
0
    def variance(self):
        """
        Compute the variance of the distribution

        Returns:
        --------

        variance : float
        """
        return binom.var(self.__n, self.__p)
示例#3
0
def aggregate():

    #Group Polls by State
    polls_by_state = {}

    for state in State.states:
        state_name = state.name
        polls_by_state[state] = []
        for poll in Poll.polls:
            poll_state = poll.state.name
            if state_name == poll_state:
                polls_by_state[state].append(poll)

    #Aggregating Polls into singular rating for each State
    for key in polls_by_state.keys():
        rating = {}
        d_sum = 0
        r_sum = 0
        error_sq_sum = 0
        n = len(polls_by_state[key])
        if n > 0:
            for poll in polls_by_state[key]:
                d_sum += poll.d
                r_sum += poll.r
                error_sq_sum += poll.error**2
            rating['D'] = round(d_sum / n, 1)
            rating['R'] = round(r_sum / n, 1)
            rating['error'] = round(math.sqrt(error_sq_sum), 2)
        else:
            n = key.election16['Total']
            d = key.election16['D']
            r = key.election16['R']
            prob_d = d / n
            prob_r = r / n
            var_d = binom.var(n, prob_d)
            var_r = binom.var(n, prob_r)
            se_d = math.sqrt(var_d / n)
            se_r = math.sqrt(var_r / n)
            rating['D'] = round(100 * prob_d, 1)
            rating['R'] = round(100 * prob_r, 1)
            rating['error'] = round(math.sqrt(se_d**2 + se_r**2), 2)
        key.poll_rating = rating
示例#4
0
    def test_inversion_diffs(self):
        cfg = AppSettings()

        reps = 1000
        deltas = []  # observed number of differences

        for _ in range(0, reps):
            dna = Chromosome()
            old_seq = dna.sequence
            dna.inversion()
            deltas.append(
                sum(1 for a, b in zip(old_seq, dna.sequence) if a != b))

        pmfs = []
        expected_deltas = []  # expected differences

        # Assumes the length of an inversion is drawn from a negative binomial
        # distribution. Calculates the probability of each length until
        # 99.99% of the distribution is accounted for. The expected number of
        # differences for each length is multiplied by the probability of that length
        # and the sum of that gives the expected differences overall.
        k = 0
        while sum(pmfs) <= 0.9999:
            pmf = nbinom.pmf(k, 1, (1 - cfg.genetics.mutation_length /
                                    (1 + cfg.genetics.mutation_length)))
            pmfs.append(pmf)

            diffs = math.floor(
                k / 2) * (1 - 1 / len(Chromosome.nucleotides())) * 2
            expected_deltas.append(pmf * diffs)
            k += 1

        expected_delta = sum(expected_deltas)

        # Since we are multiplying the binomial distribution (probably of differences at
        # a given lenght) by a negative binomial distribution (probability of a length)
        # we must compute the variance of two independent random variables
        # is Var(X * Y) = var(x) * var(y) + var(x) * mean(y) + mean(x) * var(y)
        # http://www.odelama.com/data-analysis/Commonly-Used-Math-Formulas/

        mean_binom = cfg.genetics.mutation_length
        var_binom = binom.var(mean_binom, 1 / (len(Chromosome.nucleotides())))

        mean_nbinom = cfg.genetics.mutation_length
        var_nbinom = nbinom.var(cfg.genetics.mutation_length,
                                mean_nbinom / (1 + mean_nbinom))

        var = var_binom * var_nbinom + \
              var_binom * mean_nbinom + \
              mean_binom * var_nbinom

        observed_delta = sum(deltas) / reps
        conf_99 = ((var / reps)**(1 / 2)) * 5
        assert expected_delta - conf_99 < observed_delta < expected_delta + conf_99
def demo13():
    n = 100
    p = 0.25
    x = np.array(range(0, n + 1))

    prob = np.array([binom.pmf(k, n, p) for k in x])

    print(binom.mean(n, p))
    print(binom.var(n, p))
    print(binom.std(n, p))

    plt.xlabel('x')
    plt.ylabel('Possibility')
    plt.bar(x, prob)
    plt.show()
 def var(self, dist):
     return binom.var(*self._get_params(dist))
示例#7
0
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import binom

a = 4
b = 33

fig, ax = plt.subplots(1, 1)
n = 400
step = 1


p = float(1) / float(1 + b)
mean, var, skew, kurt = binom.stats(n, p, moments='mvsk')
print binom.var(n, p)
print binom.expect(lambda x: x, args=(n, p))
print binom.expect(lambda x: x ** 2, args=(n, p))

# x = np.arange(binom.ppf(0.00001, n, p), binom.ppf(0.99999, n, p))
# x = np.arange(binom.ppf(0.01, n, p), binom.ppf(0.99, n, p))
x = np.arange(binom.ppf(0.001, n, p), binom.ppf(0.999, n, p), step)
y = np.array(binom.pmf(x, n, p), dtype=float)


def squarer(pos1=1, pos2=len(x)):
    square = 0
    if pos2 > len(x): pos2 -= len(x)
    for i in range(pos1, pos2):
        square += (float(y[i - 1] + y[i]) / float(2)) * (x[i] - x[i - 1])
    return square
示例#8
0
# -*- coding: utf-8 -*-
"""
Created on Sun Mar 22 13:46:12 2020

@author:Shaurya Prakash
"""
from scipy.stats import binom
""" 4 coins weere tossed simultaneouly , what is probablility of getting 2 heas?
"""

n = 4
p = 0.5
x = 2

probablity_of_getting_2_heads = binom.pmf(x, n, p)
probablity_of_getting_atmost_2_heads = binom.cdf(x, n, p)
mean = binom.mean(n, p)
variance = binom.var(n, p)
from scipy.stats import binom
import numpy as np

# Binomal Distribution
n = 8
k = 4
p = 0.5
q = 1 - p

expect = binom.expect(args=(n, p))
mean = binom.mean(n, p)
var = binom.var(n, p)
sigma = binom.std(n, p)
mode = np.floor((n + 1) * p)
pmf = binom.pmf(k, n, p)
cdf = binom.cdf(k, n, p)
ppf = binom.ppf(q, n, p)

print('expected value = ', expect)
print('mean = ', mean)
print('variance = ', var)
print('std. dev. = ', sigma)
print('mode = ', mode)
print('pmf = ', pmf)
print('cdf = ', cdf)
print('ppf = ', ppf)