def bayesian_log_normal_with_0_test (A_data, B_data, m0, k0,s_sq0, v0, mean_lift = 0):

    # modeling zero vs. non-zero
    non_zeros_A = sum(A_data > 0)
    total_A = len(A_data)
    non_zeros_B = sum(B_data > 0)
    total_B = len(B_data)
    alpha = 1 # uniform prior
    beta = 1

    n_samples = 10000 # number of samples to draw
    A_conv_samps = beta_dist(non_zeros_A+alpha, total_A-non_zeros_A+beta, n_samples)
    B_conv_samps = beta_dist(non_zeros_B+alpha, total_B-non_zeros_B+beta, n_samples)

    # modeling the non-zeros with a log-normal
    A_non_zero_data = A_data[A_data > 0]
    B_non_zero_data = B_data[B_data > 0]

    A_order_samps = draw_log_normal_means(A_non_zero_data,m0,k0,s_sq0,v0)
    B_order_samps = draw_log_normal_means(B_non_zero_data,m0,k0,s_sq0,v0)

    # combining the two
    A_rps_samps = A_conv_samps*A_order_samps
    B_rps_samps = B_conv_samps*B_order_samps

    # the result
    print mean(A_rps_samps > B_rps_samps)
def bayesian_log_normal_with_0_test(A_data,
                                    B_data,
                                    m0,
                                    k0,
                                    s_sq0,
                                    v0,
                                    mean_lift=0):

    # modeling zero vs. non-zero
    non_zeros_A = sum(A_data > 0)
    total_A = len(A_data)
    non_zeros_B = sum(B_data > 0)
    total_B = len(B_data)
    alpha = 1  # uniform prior
    beta = 1

    n_samples = 10000  # number of samples to draw
    A_conv_samps = beta_dist(non_zeros_A + alpha, total_A - non_zeros_A + beta,
                             n_samples)
    B_conv_samps = beta_dist(non_zeros_B + alpha, total_B - non_zeros_B + beta,
                             n_samples)

    # modeling the non-zeros with a log-normal
    A_non_zero_data = A_data[A_data > 0]
    B_non_zero_data = B_data[B_data > 0]

    A_order_samps = draw_log_normal_means(A_non_zero_data, m0, k0, s_sq0, v0)
    B_order_samps = draw_log_normal_means(B_non_zero_data, m0, k0, s_sq0, v0)

    # combining the two
    A_rps_samps = A_conv_samps * A_order_samps
    B_rps_samps = B_conv_samps * B_order_samps

    # the result
    print mean(A_rps_samps > B_rps_samps)
def calc_beta_prob(df_grouped, num_samples=100000):
    #perform bayesian test to simulate future data distributions
    clicks_new = df_grouped[df_grouped['optimum'] == 1]['clicked']
    view_new = df_grouped[df_grouped['optimum'] == 1]['counter']

    clicks_old = df_grouped[df_grouped['optimum'] == 0]['clicked']
    view_old = df_grouped[df_grouped['optimum'] == 0]['counter']

    new_samples = beta_dist(1 + clicks_new, 1 + view_new - clicks_new,
                            num_samples)
    old_samples = beta_dist(1 + clicks_old, 1 + view_old - clicks_old,
                            num_samples)

    return np.mean(new_samples - old_samples > .015)
示例#4
0
文件: models.py 项目: dyspop/baywatch
def calculate_statistics(pool, events, samples_to_draw):
    #useful for identifying which variables are possible to be pulled out for scaling or different distributions, despite appearing like arbitrary abstractions
    c = 1  #used to vary sample size by a scalar multiplier
    alpha = 1  #30 #prior
    beta = 1  #70 #prior
    views = pool * c
    clicks = events * c

    return beta_dist(clicks + alpha, views - clicks + beta, samples_to_draw)
示例#5
0
    def sample(self, data=None, n=1):
        """Return n samples from distribution"""

        if data is None:
            data = self.data

        successes = count_nonzero(data)
        total = len(data)
        samples = beta_dist(self.alpha + successes,
                            self.beta + total - successes, n)
        return samples
示例#6
0
 def p_donate_ci(self, a=5, alpha =1, beta=1):
     """
     Rretuns a 100-a credible interval
     for the donation rate
     """
     ones = self.counts[1:]
     zeros = self.counts[0]
     dist = beta_dist(ones + alpha, zeros + beta, 10000)
     lower_bound = np.percentile(dist, a / 2.0)
     upper_bound = np.percentile(dist, 100 - a / 2.0)
     mean = np.mean(dist)
     return (lower_bound, self.p_donate, upper_bound)
示例#7
0
def calculate_clickthrough_prob(clicks_A, views_A, clicks_B, views_B):
    '''
    INPUT: INT, INT, INT, INT
    OUTPUT: FLOAT

    Calculate and return an estimated probability that SiteA performs better
    (has a higher click-through rate) than SiteB.

    Hint: Use Bayesian A/B Testing (multi-armed-bandit repo)
    '''

    samp = 10000
    Aa = clicks_A + 1
    Ab = clicks_B + 1
    Ba = views_A - clicks_A + 1
    Bb = views_B - clicks_B + 1

    A_prob = beta_dist(Aa, Ba, samp)
    B_prob = beta_dist(Ab, Bb, samp)

    return np.sum(A_prob > B_prob) / float(samp)
示例#8
0
 def p_donate_ci(self, a=5, alpha=1, beta=1):
     """
     Rretuns a 100-a credible interval
     for the donation rate
     """
     ones = self.counts[1:]
     zeros = self.counts[0]
     dist = beta_dist(ones + alpha, zeros + beta, 10000)
     lower_bound = np.percentile(dist, a / 2.0)
     upper_bound = np.percentile(dist, 100 - a / 2.0)
     mean = np.mean(dist)
     return (lower_bound, self.p_donate, upper_bound)
示例#9
0
def _sampled_based(vuln_function, gmf_set, epsilon_provider, asset):
    """Compute the set of loss ratios when at least one CV
    (Coefficent of Variation) defined in the vulnerability function
    is greater than zero.

    :param vuln_function: the vulnerability function used to
        compute the loss ratios.
    :type vuln_function: :py:class:`openquake.shapes.VulnerabilityFunction`
    :param gmf_set: ground motion fields used to compute the loss ratios
    :type gmf_set: :py:class:`dict` with the following
        keys:
        **IMLs** - tuple of ground motion fields (float)
        **TimeSpan** - time span parameter (float)
        **TSES** - time representative of the Stochastic Event Set (float)
    :param epsilon_provider: service used to get the epsilon when
        using the sampled based algorithm.
    :type epsilon_provider: object that defines an :py:meth:`epsilon` method
    :param asset: the asset used to compute the loss ratios.
    :type asset: an :py:class:`openquake.db.model.ExposureData` instance
    """

    loss_ratios = []

    for ground_motion_field in gmf_set["IMLs"]:
        if ground_motion_field < vuln_function.imls[0]:
            loss_ratios.append(0.0)
        else:
            if ground_motion_field > vuln_function.imls[-1]:
                ground_motion_field = vuln_function.imls[-1]

            mean_ratio = vuln_function.loss_ratio_for(ground_motion_field)
            cov = vuln_function.cov_for(ground_motion_field)

            if vuln_function.is_beta:
                stddev = cov * mean_ratio
                alpha = compute_alpha(mean_ratio, stddev)
                beta = compute_beta(mean_ratio, stddev)
                loss_ratios.append(beta_dist(alpha, beta, size=None))
            else:
                variance = (mean_ratio * cov) ** 2.0
                epsilon = epsilon_provider.epsilon(asset)

                sigma = math.sqrt(
                    math.log((variance / mean_ratio ** 2.0) + 1.0))

                mu = math.log(mean_ratio ** 2.0 / math.sqrt(
                    variance + mean_ratio ** 2.0))

                loss_ratios.append(math.exp(mu + (epsilon * sigma)))

    return array(loss_ratios)
 def __init__(self, alpha: float, beta: float, labels_1: Set[str], labels_2: Set[str], equalities: Mapping,
              threshold: float = 0.):
     """
     :param alpha: alpha value for beta distribution
     :param beta: beta value for beta distribution
     :param labels_1: source space
     :param labels_2: target space
     :param equalities: labels that are considered equal (1 - beta)
     :param threshold: every similarity below is considered to be zero
     """
     s = ((a, b, beta_dist(alpha, beta)) for a, b in product(labels_1, labels_2))
     s = ((a, b, 1 - w if (a, b) in equalities else w) for a, b, w in s)
     s = ((a, b, w if w > threshold else 0.) for a, b, w in s)
     super().__init__(s)
示例#11
0
def _sampled_based(vuln_function, gmf_set, epsilon_provider, asset):
    """Compute the set of loss ratios when at least one CV
    (Coefficent of Variation) defined in the vulnerability function
    is greater than zero.

    :param vuln_function: the vulnerability function used to
        compute the loss ratios.
    :type vuln_function: :py:class:`openquake.shapes.VulnerabilityFunction`
    :param gmf_set: ground motion fields used to compute the loss ratios
    :type gmf_set: :py:class:`dict` with the following
        keys:
        **IMLs** - tuple of ground motion fields (float)
        **TimeSpan** - time span parameter (float)
        **TSES** - time representative of the Stochastic Event Set (float)
    :param epsilon_provider: service used to get the epsilon when
        using the sampled based algorithm.
    :type epsilon_provider: object that defines an :py:meth:`epsilon` method
    :param asset: the asset used to compute the loss ratios.
    :type asset: an :py:class:`openquake.db.model.ExposureData` instance
    """

    loss_ratios = []

    for ground_motion_field in gmf_set["IMLs"]:
        if ground_motion_field < vuln_function.imls[0]:
            loss_ratios.append(0.0)
        else:
            if ground_motion_field > vuln_function.imls[-1]:
                ground_motion_field = vuln_function.imls[-1]

            mean_ratio = vuln_function.loss_ratio_for(ground_motion_field)
            cov = vuln_function.cov_for(ground_motion_field)

            if vuln_function.is_beta:
                stddev = cov * mean_ratio
                alpha = compute_alpha(mean_ratio, stddev)
                beta = compute_beta(mean_ratio, stddev)
                loss_ratios.append(beta_dist(alpha, beta, size=None))
            else:
                variance = (mean_ratio * cov)**2.0
                epsilon = epsilon_provider.epsilon(asset)

                sigma = math.sqrt(math.log((variance / mean_ratio**2.0) + 1.0))

                mu = math.log(mean_ratio**2.0 /
                              math.sqrt(variance + mean_ratio**2.0))

                loss_ratios.append(math.exp(mu + (epsilon * sigma)))

    return array(loss_ratios)
def sampleSuccessRateForBinomialDataAndBetaPriori(data_n,data_k,alpha=1,beta=1,samples=10000):
    return beta_dist(data_k+alpha, data_n-data_k+beta, samples)
示例#13
0
from numpy.random import beta as beta_dist
from numpy import percentile

import numpy as np

N_samp = 10000000  # number of samples to draw
c = 1  #used to vary sample size by a scalar multiplier

## INSERT YOUR OWN DATA HERE
clicks_A = (44) * c
views_A = (9610) * c
clicks_B = (426) * c
views_B = (83617) * c
alpha = 1  #30 #prior
beta = 1  #70 #prior
A_samples = beta_dist(clicks_A + alpha, views_A - clicks_A + beta, N_samp)
B_samples = beta_dist(clicks_B + alpha, views_B - clicks_B + beta, N_samp)

#confidence intervals: eg 2.5 = 95, 10 = 80
print[
    round(np.percentile((B_samples - A_samples) / B_samples, 2.5), 4),
    round(np.percentile((B_samples - A_samples) / B_samples, 97.5), 4)
]
print[
    round(np.percentile((B_samples - A_samples) / B_samples, 10), 4),
    round(np.percentile((B_samples - A_samples) / B_samples, 90), 4)
]

# percent lift needed
# base lift 1,
'''
示例#14
0
v_conversions = 1799 * c
v_visits = 207434 * c
#lift_perc = .03
c_mean = 80.03
c_var = 503950.69
v_mean = 78.64
v_var = 493547.51

N_samp = 75000
clicks_A = c_conversions
views_A = c_visits
clicks_B = v_conversions
views_B = v_visits
alpha = 1
beta = 1
A_conv_samps = beta_dist(clicks_A + alpha, views_A - clicks_A + beta, N_samp)
B_conv_samps = beta_dist(clicks_B + alpha, views_B - clicks_B + beta, N_samp)

A_order_samps = draw_mus(c_conversions, c_mean, c_var, 0, 1, 1, 1, N_samp)
B_order_samps = draw_mus(v_conversions, v_mean, v_var, 0, 1, 1, 1, N_samp)

A_rps_samps = A_conv_samps * A_order_samps
B_rps_samps = B_conv_samps * B_order_samps

# set current winner
if (mean(A_rps_samps) >= mean(B_rps_samps)):
    Current_Winner_rps_samps = A_rps_samps
    Current_Loser_rps_samps = B_rps_samps
    current_winner_str = "CHOOSE CONTROL"
else:
    Current_Winner_rps_samps = B_rps_samps
示例#15
0
def get_samples(success, population, alpha, beta, sample_size):
    return beta_dist(success+alpha, population-success+beta, sample_size)
示例#16
0
A_log_norm_data = lognormal(mean=4.20, sigma=1.0, size=100)
B_log_norm_data = lognormal(mean=4.00, sigma=1.0, size=100)
# appending many many zeros
A_data = concatenate([A_log_norm_data,zeros((10000))])
B_data = concatenate([B_log_norm_data,zeros((10000))])

# modeling zero vs. non-zero
non_zeros_A = sum(A_data > 0)
total_A = len(A_data)
non_zeros_B = sum(B_data > 0)
total_B = len(B_data)
alpha = 1 # uniform prior
beta = 1

n_samples = 100000 # number of samples to draw
A_conv_samps = beta_dist(non_zeros_A+alpha, total_A-non_zeros_A+beta, n_samples)
B_conv_samps = beta_dist(non_zeros_B+alpha, total_B-non_zeros_B+beta, n_samples)

# modeling the non-zeros with a log-normal
A_non_zero_data = A_data[A_data > 0]
B_non_zero_data = B_data[B_data > 0]

m0 = 4. 
k0 = 1. 
s_sq0 = 1. 
v0 = 1. 

A_order_samps = draw_log_normal_means(A_non_zero_data,m0,k0,s_sq0,v0,n_samples)
B_order_samps = draw_log_normal_means(B_non_zero_data,m0,k0,s_sq0,v0,n_samples)

# combining the two
示例#17
0
 def init_user(self, graph):
     dist = beta_dist(self.alpha, self.beta, len(graph._workers._nodes))
     for i, (worker, _, _, _, _) in enumerate(graph.workers()):
         worker.p = dist[i]
示例#18
0
 def init_user(self, graph):
     dist = beta_dist(self.alpha, self.beta, len(graph._workers._nodes))
     for i, (worker, _, _, _, _) in enumerate(graph.workers()):
         worker.p = dist[i]
示例#19
0
def get_samples(success, population, alpha, beta, sample_size):
    return beta_dist(success + alpha, population - success + beta, sample_size)