Пример #1
0
def gen_plot_bernoulli():

    # generate bernoulli
    print("\nBernoulli pmf w/ scipy.stats.bernoulli.pmf(0, p=0.3): {:.3f}".
          format(bernoulli.pmf(0, p=0.3)))
    print("\nBernoulli pmf w/ scipy.stats.bernoulli.pmf(range(3), p=0.3): {}".
          format(bernoulli.pmf(range(3), p=0.3)))
    print(
        "\nBernoulli cdf w/ scipy.stats.bernoulli.cdf([0, 0.5, 1, 1.5], p=0.3): {}"
        .format(bernoulli.cdf([0, 0.5, 1, 1.5], p=0.3)))

    # plot Bernoulli
    plt.stem([-0.2, 0, 1, 1.2], bernoulli.pmf([-0.2, 0, 1, 1.2], p=.3))
    plt.plot(np.linspace(-0.1, 1.1, 1200),
             bernoulli.cdf(np.linspace(-0.1, 1.1, 1200), p=0.3), 'g')
    plt.xlim([-0.1, 1.1])
    plt.ylim([-0.2, 1.1])
    plt.show()

    # generate Bernoulli samples
    print(
        "\nBernoulli samples w/ scipy.stats.bernoulli.rvs(size=10, p=.3): {}".
        format(bernoulli.rvs(size=10, p=.3)))

    plt.hist(bernoulli.rvs(size=10, p=.3), density=True)
    plt.show()

    return None
Пример #2
0
def compute_vote_likelihood_ratio(name,
                                  true_ideal_points,
                                  votes,
                                  senator_indices,
                                  bill_indices,
                                  voter_map,
                                  popularity_mean,
                                  polarity_mean,
                                  null_ideal_point=0.,
                                  query_size=10):
    """Compute top votes based on likelihood ratio statistic.
  
  Args:
    name: Name of Senator to calculate ratio for.
    true_ideal_points: A vector of length [num_voters] containing the learned
      vote ideal points.
    votes: An array with shape [num_votes], containing the binary vote cast
      for all recorded votes.
    senator_indices: An array with shape [num_votes], where each entry is an
      integer in {0, 1, ..., num_voters - 1}, containing the index for the 
      Senator corresponding to the vote in `votes`.
    bill_indices: An array with shape [num_votes], where each entry is an
      integer in {0, 1, ..., num_bills - 1}, containing the index for the 
      bill corresponding to the vote in `votes`.
    voter_map: A string array with length [num_voters] containing the
      name for each voter in the data set.
    popularity_mean: The learned variational mean for the bill popularities
      (alpha). A vector with shape [num_bills].
    polarity_mean: The learned variational mean for the bill polarities (eta).
      A vector with shape [num_bills].
    null_ideal_point: The null ideal point for the likelihood ratio test. For
      example, a null ideal point of 0 would capture why the author is away
      from 0 (which documents and words make this author extreme?).
    query_size: Number of documents to query.
  
  Returns:
    top_indices: A vector of ints with shape [query_size], representing the 
      indices for the top bills identified by the likelihood ratio statistic.
  """
    voter_index = np.where(voter_map == name)[0][0]
    relevant_indices = np.where(senator_indices == voter_index)[0]
    fitted_logit = (true_ideal_points[voter_index] *
                    polarity_mean[bill_indices[relevant_indices]] +
                    popularity_mean[bill_indices[relevant_indices]])
    null_logit = (
        null_ideal_point * polarity_mean[bill_indices[relevant_indices]] +
        popularity_mean[bill_indices[relevant_indices]])
    # Unlike TBIP likelihood ratio statistic, the output distirbution for vote
    # ideal points is Bernoulli. Here we compute the logit.
    fitted_mean = 1. / (1. + np.exp(-fitted_logit))
    null_mean = 1. / (1. + np.exp(-null_logit))
    fitted_log_likelihood = bernoulli.pmf(votes[relevant_indices], fitted_mean)
    null_log_likelihood = bernoulli.pmf(votes[relevant_indices], null_mean)
    log_likelihood_differences = fitted_log_likelihood - null_log_likelihood
    top_indices = bill_indices[relevant_indices[np.argsort(
        -log_likelihood_differences)[:query_size]]]
    return top_indices
Пример #3
0
    def predict(self, X_test):

        probas_xi = bernoulli.pmf(X_test, self.means[0])
        probas_xi[probas_xi==0] = 0.01
        probas_y0 = np.sum(np.log(probas_xi),1)+np.log(self.probas_y[0])

        probas_xi = bernoulli.pmf(X_test, self.means[1])
        probas_xi[probas_xi==0] = 0.01
        probas_y1 = np.sum(np.log(probas_xi),1)+np.log(self.probas_y[1])

        return (probas_y1 > probas_y0).astype(int)
def probability_of_signal_given_state(signal_strength, closest_z, base_position, max_range):
    closest_x, closest_y = closest_z
    #Idea: Use measurement position instead of closest_point_in_state
    #Problem: Missing data will cascade through algorithm
    #Solution: Use measurement position if available. Otherwise current solution
    distance_to_state = np.linalg.norm(base_position - np.array([closest_x, closest_y]))
    if np.isnan(signal_strength):
        return 1
    elif signal_strength == 0:
        return bernoulli.pmf(0, max(0, 1 - distance_to_state/max_range))
    else:
        return bernoulli.pmf(1, max(0, 1 - distance_to_state/max_range))*beta.pdf(signal_strength, 2, 5*distance_to_state/max_range)
Пример #5
0
    def _negative_log_likelihood(self, w, y, X, mask=None):
        """
        Returns logistic regression negative log likelihood
        :param w: the parameters at their current estimates of shape (n_features,)
        :param y: the response vector of shape (n_obs,)
        :param X: the design matrix of shape (n_features, n_obs)
        :param mask: the binary mask vector of shape (n_obs,). 1 if observed, 0 o/w

        :returns: negative log likelihood value
        :rtype: float
        """
        sigm = _sigmoid(w.dot(X))
        if mask is not None:
            return -np.sum(np.log(bernoulli.pmf(y, sigm) * mask + 1e-5))
        else:
            return -np.sum(np.log(bernoulli.pmf(y, sigm) + 1e-5))
Пример #6
0
def sim(samples):
    n = 201 * 91
    sims = {
        "x": empty(n),
        "y": empty(n),
    }
    i = 0
    for y in range(-45, 46, 1):
        for x in range(0, 201, 1):
            sims["x"][i] = x
            sims["y"][i] = y
            i += 1
    mean = samples.mean()
    sims["shot_prob"] = expit(
        norm.logpdf(sims["x"], mean.shot_mu_x, mean.shot_sigma_x) +
        norm.logpdf(sims["y"], mean.shot_mu_y, mean.shot_sigma_y), )
    m = sims["shot_prob"].min()
    sims["shot_prob"] = (sims["shot_prob"] - m) / (sims["shot_prob"].max() - m)
    sims["goal_prob"] = bernoulli.pmf(
        1,
        expit(
            norm.logpdf(sims["x"], mean.goal_mu_x, mean.goal_sigma_x) +
            norm.logpdf(sims["y"], mean.goal_mu_y, mean.goal_sigma_y) +
            mean.goal_offset, ))
    return sims
Пример #7
0
    def pdf(self, x: float):
        """Find the PDF for a certain x value.

        Args:
            x (float): The value for which the PDF is needed.
        """
        return bernoulli.pmf(x, self.p)
Пример #8
0
 def plot(self, n, p, x):
     pmf = bernoulli.pmf(x, n, p)
     plt.plot(x, pmf, 'o-')
     plt.title('Bernaulli: n=%i , p=%.2f' % (n, p), fontsize='value')
     plt.xlabel('Number of successes')
     plt.ylable('Probability of Successes', fontsize='value')
     plt.show()
Пример #9
0
    def _negative_log_likelihood(self, w, y, X, mask=None):
        """
        Returns logistic regression negative log likelihood
        :param w: the parameters at their current estimates of shape (n_features,)
        :param y: the response vector of shape (n_obs,)
        :param X: the design matrix of shape (n_features, n_obs)
        :param mask: the binary mask vector of shape (n_obs,). 1 if observed, 0 o/w

        :returns: negative log likelihood value
        :rtype: float
        """
        sigm = _sigmoid(w.dot(X))
        if mask is not None:
            return -np.sum(np.log(bernoulli.pmf(y, sigm) * mask + 1e-5))
        else:
            return -np.sum(np.log(bernoulli.pmf(y, sigm) + 1e-5))
Пример #10
0
    def pmf(self):
        """
        Compute the probability mass function of the distribution

        Returns:
        --------

        pmf : float
        """
        return bernoulli.pmf(self.__r, self.__p)
Пример #11
0
def SampleMethod2(numSamples):
    ys = []
    thetas = []
    for i in range(numSamples):
        y = []
        p = 1.0 / 3.0
        for j in range(5):
            theta = bernoulli.rvs(p, size=1)
            if theta == 0:
                sample = bernoulli.rvs(1.0 / 2.0, size=1)
            elif theta == 1:
                sample = bernoulli.rvs(3.0 / 4.0, size=1)
            y.append(sample)
            p = p * bernoulli.pmf(sample, 3.0 / 4.0) / (
                p * bernoulli.pmf(sample, 3.0 / 4.0) +
                (1 - p) * bernoulli.pmf(sample, 1.0 / 2.0))
        ys.append(np.sum(y))
        thetas.append(bernoulli.rvs(p, size=1))
    return (thetas, ys)
Пример #12
0
    def pmfs(self):
        """
        Compute the probability mass function of the distribution the
        success and failure in one trial p, 1-p

        Returns:
        --------

        pmf : numpy.narray
        """
        return bernoulli.pmf(self.__all_r, self.__p)
 def likelihood_ber(self, Z, i, k):
     result=1
     num=1
     den1=1
     for d in range(self.D):
         for k in range(self.K):  #compute theta_d equation (7)
             num=num*self.theta[k,d]**Z[i,k]
             den1=den1*(1-self.theta[k,d])**Z[i,k]
             theta_d=num/(den1+num)
         result=result*bernoulli.pmf(k=self.X[i,d],p=theta_d) #compute likelihood
     return result
Пример #14
0
    def test_bernoulli(self):
        fig, ax = plt.subplots(1, 1)

        p = 0.3
        mean, var, skew, kurt = bernoulli.stats(p, moments='mvsk')

        x = np.arange(bernoulli.ppf(0.01, p), bernoulli.ppf(0.99, p))

        ax.plot(x, bernoulli.pmf(x, p), 'bo', ms=8, label='bernoulli pmf')
        ax.vlines(x, 0, bernoulli.pmf(x, p), colors='b', lw=5, alpha=0.5)

        rv = bernoulli(p)

        ax.vlines(x,
                  0,
                  rv.pmf(x),
                  colors='k',
                  linestyles='-',
                  lw=1,
                  label='frozen pmf')
        ax.legend(loc='best', frameon=False)

        self.assertEqual("AxesSubplot(0.125,0.11;0.775x0.77)", str(ax))
Пример #15
0
    def predict(self, test_data):
        self.p_digit_class = []
        self.predicted = []
        if self.p == [] or self.digits_p_ink == []:
            print("Fit your model to training data first")
            return []

        for i in range(10):
            berpmf = bernoulli.pmf(test_data, self.digits_p_ink[i])
            p_post = np.sum(np.log(berpmf), axis=1) + np.log(self.p[i])
            self.p_digit_class.append(p_post)

        self.p_digit_class = np.array(self.p_digit_class)
        self.predicted = np.argmax(self.p_digit_class, axis=0)
        return self.predicted
Пример #16
0
def bernoulli_mixture_pmf(data, means, K):
    '''To compute the probability of x for each bernouli distribution
    data = N X D matrix
    means = K X D matrix
    prob (result) = N X K matrix
    '''
    N = len(data)
    D = len(data[0])
    # compute prob(x/mean)
    # prob[i, k] for ith data point, and kth cluster/mixture distribution
    prob = np.zeros((N, K))
    for k in range(K):
        b = lambda row: np.prod(bern.pmf(row, means[k]))
        prob[:, k] = np.apply_along_axis(b, 1, data)
    return prob
Пример #17
0
def predict(img, paras, classParas, bern=False):
    resultSet = []
    for i in range(0, len(classParas)):
        resultSet.append(0)
    for classIndex in range(0, len(classParas)):
        if (bern):
            resultSet[classIndex] = np.nansum(
                np.log(bernoulli.pmf(img, paras[classIndex])))
        else:
            resultSet[classIndex] = np.nansum(
                np.log(
                    norm.pdf(img, paras[classIndex][0], paras[classIndex][1])))
    for i in range(0, len(classParas)):
        resultSet[i] += np.log(classParas[i])
    return np.argmax(resultSet)
Пример #18
0
def expected_log_likelihood(data, weights, means, K):
    '''To compute expectation of the loglikelihood of Mixture of Beroullie distributions
    Since computing E(LL) requires computing responsibilities, this function does a double-duty
    to return responsibilities too
    '''
    N = len(data)
    responsibilities = compute_responsibilities(data, weights, means, K)
    ll = 0
    sumK = np.zeros(N)
    for k in range(K):
        b = lambda row: np.log(bern.pmf(row, np.absolute(means[k])))
        temp1 = np.apply_along_axis(b, 1, data)
        sumK += responsibilities[:, k] * (np.log(np.absolute(weights[k])) +
                                          np.sum(temp1, axis=1))
        sumK = np.nan_to_num(sumK)
    ll += np.sum(sumK)
    return (ll, responsibilities)
Пример #19
0
    def _compute_total_log_likelihood(self):
        log_likelihood = 0

        theta = self.get_theta()
        log_theta = np.log(theta)
        phi = self.get_phi()
        log_phi = np.log(phi)

        ALPHA = self.alpha_0 * np.ones(self.n_topics)

        for document_index in range(self.n_documents):
            # theta
            # log_likelihood += np.log(dirichlet.pdf(theta[document_index], ALPHA))
            log_likelihood += self.dirichlet_pdf_log(theta[document_index],
                                                     ALPHA)

            for token_index in range(len(self.Z[document_index])):
                word_index, topic_index = self.Z[document_index][token_index]
                if topic_index != WEIFTM.NO_TOPIC:
                    # w
                    log_likelihood += log_phi[topic_index, word_index]
                    # z
                    log_likelihood += log_theta[document_index, topic_index]

        log_likelihood += np.sum(
            np.log(bernoulli.pmf(self.b, sigmoid(self.pi))))

        for k in range(self.n_topics):
            # phi
            b_k_nonzero = self.b[k].nonzero()[0]
            BETA = self.beta_0 * np.ones(b_k_nonzero.shape[0])
            # log_likelihood += np.log(dirichlet.pdf(phi[k][b_k_nonzero], BETA))
            log_likelihood += self.dirichlet_pdf_log(phi[k][b_k_nonzero], BETA)
            # c
            log_likelihood += np.log(norm.pdf(self.c[k], 0, self.sig_0))

            for l in range(self.embedding_size):
                # lamb
                log_likelihood += np.log(
                    norm.pdf(self.lamb[k, l], 0, self.sig_0))

        return log_likelihood
Пример #20
0
from scipy.stats import bernoulli
import numpy as np
import matplotlib.pyplot as plt

p = 0.8
k1 = 0
k2 = 1
k = np.linspace(k1, k2, 100)
temp1 = bernoulli.cdf(k, p)
temp2 = bernoulli.pmf(k1, p)
temp3 = bernoulli.pmf(k2, p)
print(temp1)
print(temp2)
print(temp3)
plt.plot(k, temp1, 'o-', color='orange')
plt.plot(p, temp2, 'o-', color='crimson')
plt.plot(p, temp3, 'o-', color='crimson')
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.grid()
plt.show()
# In[19]:

from scipy.stats import bernoulli
brv = bernoulli(p=0.3)
brv.rvs(size=20)

# In[20]:

event_space = [0, 1]
plt.figure(figsize=(12, 8))
colors = sns.color_palette()
for i, p in enumerate([0.1, 0.2, 0.5, 0.7]):
    ax = plt.subplot(1, 4, i + 1)
    plt.bar(event_space,
            bernoulli.pmf(event_space, p),
            label=p,
            color=colors[i],
            alpha=0.5)
    plt.plot(event_space,
             bernoulli.cdf(event_space, p),
             color=colors[i],
             alpha=0.5)

    ax.xaxis.set_ticks(event_space)

    plt.ylim((0, 1))
    plt.legend(loc=0)
    if i == 0:
        plt.ylabel("PDF at $k$")
plt.tight_layout()
Пример #22
0
import matplotlib.pyplot as plt
from scipy.stats import bernoulli
import numpy as np
import pandas as pd
# %matplotlib inline

plt.style.use("ggplot")

p_a = 3.0 / 10.0
p_b = 5.0 / 9.0
p_prior = 0.5
#0:blue, 1:red
data = [0,1,0,0,1,1,1]

N_data = 7
likehood_a = bernoulli.pmf(data[:N_data], p_a)
likehood_b = bernoulli.pmf(data[:N_data], p_b)

likehood_a

pa_posterior = p_prior  # 事前分布
pb_posterior = p_prior
pa_posterior *= np.prod(likehood_a)  # 積計算
pb_posterior *= np.prod(likehood_b)
norm = pa_posterior + pb_posterior  # エビデンス(規格化)
df = pd.DataFrame([pa_posterior/norm, pb_posterior/norm], columns=["post"])  # 事後分布の確率分布
x = np.arange(df.shape[0])
plt.bar(x,df["post"])
plt.xticks(x,["a","b"])

Пример #23
0
 def score(self,X,y):
     prediction = self.predict(X)
     return np.log(bernoulli.pmf(y,prediction)).sum()
Пример #24
0
    plt.xlabel("Obama Electoral College Votes")
    plt.ylabel("Probability")
    sns.despine()

plot_simulation(result)

from scipy.stats import bernoulli
brv = bernoulli(p=0.3)
brv.rvs(size=20)

event_space=[0,1]
plt.figure(figsize=(12,8))
colors=sns.color_palette()
for i, p in enumerate([0.1, 0.2, 0.5, 0.7]):
    ax = plt.subplot(1, 4, i+1)
    plt.bar(event_space, bernoulli.pmf(event_space, p), label=p, color = colors[i], alpha = 0.5)
    plt.plot(event_space, bernoulli.cdf(event_space, p), color = colors[i], alpha=0.5)

    ax.xaxis.set_ticks(event_space)

    plt.ylim((0,1))
    plt.legend(loc=0)
    if i == 0:
        plt.ylabel("PDF at $k$")
plt.tight_layout()

CDF = lambda x: np.float(np.sum(result < x))/result.shape[0]
for votes in [200, 300, 320, 340, 360, 400, 500]:
    print "Obama Win CDF at votes=", votes, " is ", CDF(votes)

votelist = np.arange(0,540, 5)
Пример #25
0
def common_dists():
    """Show some commonly used distributions."""
    # prep the subplots
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
    axes = axes.flatten()

    # gaussian
    mu, sigma = 0, 1
    x = np.linspace(mu - 3 * sigma, mu + 3 * sigma, 100)
    axes[0].plot(x, norm.pdf(x, mu, sigma))
    axes[0].set_title('Gaussian PDF')
    axes[0].set_ylabel('density')
    axes[0].set_xlabel('x')
    axes[0].annotate(r'$\mu$',
                     xy=(mu, 0.4),
                     xytext=(mu - 0.09, 0.3),
                     arrowprops=dict(arrowstyle='->'))
    axes[0].annotate('',
                     xy=(mu - sigma, 0.25),
                     xytext=(mu + sigma, 0.25),
                     arrowprops=dict(arrowstyle='|-|, widthB=0.5, widthA=0.5'))
    axes[0].annotate(r'$2\sigma$', xy=(mu - 0.15, 0.22))

    # uniform distribution defined by min (a) and max (b)
    a, b = 0, 1
    peak = 1 / (b - a)
    axes[1].plot([a, a, b, b], [0, peak, peak, 0])
    axes[1].set_title('Uniform PDF')
    axes[1].set_ylabel('density')
    axes[1].set_xlabel('x')
    axes[1].annotate('min',
                     xy=(a, peak),
                     xytext=(a + 0.2, peak - 0.2),
                     arrowprops=dict(arrowstyle='->'))
    axes[1].annotate('max',
                     xy=(b, peak),
                     xytext=(b - 0.3, peak - 0.2),
                     arrowprops=dict(arrowstyle='->'))
    axes[1].set_ylim(0, 1.5)

    # exponential
    x = np.linspace(0, 5, 100)
    axes[2].plot(x, expon.pdf(x, scale=1 / 3))
    axes[2].set_title('Exponential PDF')
    axes[2].set_ylabel('density')
    axes[2].set_xlabel('x')
    axes[2].annotate(r'$\lambda$ = 3',
                     xy=(0, 3),
                     xytext=(0.5, 2.8),
                     arrowprops=dict(arrowstyle='->'))

    # Bernoulli of coin toss
    axes[3].bar(['heads', 'tails'], bernoulli.pmf([0, 1], p=0.5))
    axes[3].set_title('Bernoulli with fair coin toss (p = 0.5)')
    axes[3].set_ylabel('probability')
    axes[3].set_xlabel('coin toss result')
    axes[3].set_ylim(0, 1)

    # Binomial of tossing a fair coin many times
    x = np.arange(0, 10)
    axes[4].plot(x, binom.pmf(x, n=x.shape, p=0.5), linestyle='--', marker='o')
    axes[4].set_title('Binomial PMF - many Bernoulli trials')
    axes[4].set_ylabel('probability')
    axes[4].set_xlabel('number of heads')

    # Poisson PMF (probability mass function) because this is a discrete random variable
    x = np.arange(0, 10)
    axes[5].plot(x, poisson.pmf(x, mu=3), linestyle='--', marker='o')
    axes[5].set_title('Poisson PMF')
    axes[5].set_ylabel('mass')
    axes[5].set_xlabel('x')
    axes[5].annotate(r'$\lambda$ = 3',
                     xy=(3, 0.225),
                     xytext=(1.9, 0.2),
                     arrowprops=dict(arrowstyle='->'))

    # add a title
    plt.suptitle('Some commonly used distributions', fontsize=15, y=0.95)

    return axes
Пример #26
0
plt.style.use('seaborn')
import seaborn as sns

# ### Bernoulli Distribution

# In[2]:

#Bernoulli Distribution
from scipy.stats import bernoulli
p = 0.7
x = np.arange(bernoulli.ppf(0.01, p), bernoulli.ppf(
    0.99, p))  #Percent Point Function (inverse of cdf — percentiles)

print("Mean              : ", bernoulli.stats(p, moments='m'))
print("Variance          : ", bernoulli.stats(p, moments='v'))
print("Prob. Mass Func.  : ", bernoulli.pmf(x, p).item())
print("Cum. Density Func.: ", bernoulli.cdf(x, p).item())

fig = plt.figure(figsize=(20, 10))
plt.subplot(221)
plt.plot(x, bernoulli.pmf(x, p), 'ro', ms=8, label='PMF=(1-p)')
plt.plot(1 - x, 1 - bernoulli.pmf(x, p), 'go', ms=8, label='PMF=p')
plt.vlines(x, 0, bernoulli.pmf(x, p), colors='r', lw=5, alpha=0.5)
plt.vlines(1 - x, 0, 1 - bernoulli.pmf(x, p), colors='g', lw=5, alpha=0.5)
plt.xlabel("Sample Space of Bernoulli Distribution", fontsize=14)
plt.ylabel("PMF", fontsize=14)
plt.title("Probability Distribution of Bernoulli(p=0.7) Distribution",
          fontsize=16)
plt.xticks(np.arange(0, 2, 1))
plt.yticks(np.arange(0, 1.1, 0.1))
plt.legend(loc='best', shadow=True)
from scipy.stats import bernoulli, poisson, uniform, expon, erlang, norm, triang

# Bernoulli
p = 0.5
x = 1
# Generate a probability using the PMF
print(bernoulli.pmf(x, p))  # 0.5
# Generate a probability using the CDF
print(bernoulli.cdf(x, p))  # 1.0
# Generate three Bernoulli random numbers
print(bernoulli.rvs(p, size=3))  # [0 1 0]

# Poisson
lmda = 2
x = 5
print(poisson.pmf(x, lmda))
print(poisson.cdf(x, lmda))
print(poisson.rvs(lmda, size=3))

# Uniform
a = 3
b = 10
x = 10
print(uniform.pdf(x, loc=a, scale=(b - a)))  # = 1 / 7
print(uniform.cdf(x, loc=a, scale=(b - a)))  # = 1.0
print(uniform.rvs(loc=3, scale=(b - a), size=3))

# Exponential
mu = 2
x = 2
print(expon.pdf(x, scale=(1 / mu)))
Пример #28
0

if __name__ == '__main__':
    filename = 'iris.data'
    data, classLabels = readData(filename)
    #print(data[1], classLabels)
    p = 0.3
    mean, var, skew, kurt = bernoulli.stats(p, moments='mvsk')
    print(mean, var, skew, kurt)
    fig, ax = plt.subplots(1, 1)
    # x = np.arange(bernoulli.ppf(0.01, p), bernoulli.ppf(0.99, p))
    #for line in range(len(data)-1):
    dataL1 = data[:, 1]

    ax.plot(dataL1,
            bernoulli.pmf(dataL1, p),
            'bo',
            ms=8,
            label='bernoulli pmf')
    plt.show()
    fig2, ax2 = plt.subplots(1, 1)
    ax2.scatter(range(150), dataL1)
    plt.show()

    fig3, ax3 = plt.subplots(1, 1)
    y = multivariate_normal.pdf(dataL1, mean=None, cov=1)
    ax3.scatter(dataL1, y)
    plt.show()

    iris = load_iris()
    X = iris.data
Пример #29
0

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import scipy.stats

from scipy.stats import bernoulli, poisson, binom

a = np.arange(2)

colors = matplotlib.rcParams['axes.color_cycle']
plt.figure(figsize=(12,8))
for i, p in enumerate([.1, .2, .6, .7]):
    ax = plt.subplot(1, 4, i+1)
    plt.bar(a, bernoulli.pmf(a, p), label=p, color=colors[i], alpha=.5)
    ax.xaxis.set_ticks(a)

    plt.legend(loc=0)
    if i == 0:
        plt.ylabel("PDF at $k$")

plt.suptitle("Bernoulli probabability")
plt.show()


k = np.arange(20)
colors = matplotlib.rcParams['axes.color_cycle'] 
plt.figure(figsize=(12,8))
for i, lambda_ in enumerate([1, 4, 6, 12]):
    plt.bar(k, poisson.pmf(k, lambda_), label=lambda_, color=colors[i], alpha=0.4, edgecolor=colors[i], lw=3)
Пример #30
0
 def p(t):
     prior_t = uniform.pdf(t, loc=0, scale=1)
     likl = np.prod(bernoulli.pmf(x, p=t))
     return prior_t * likl
Пример #31
0
# E\left[x \right]=\mu
# $
#
# #### 分散
# $
# V\left[x\right]=\mu\left(1-\mu \right)
# $
# #### 確率質量関数

# In[6]:

from scipy.stats import bernoulli

mu = 0.3

print(bernoulli.pmf(0, mu))
print(bernoulli.pmf(1, mu))

# #### サンプリング
# 1000個サンプリングして、ヒストグラムで表示してみます。

# In[7]:

from scipy.stats import bernoulli

mu = 0.3
size = 1000

x = bernoulli.rvs(mu, size=size)
plt.hist(x, bins=3)
Пример #32
0
def data_prob(pm, y):
    return bernoulli.pmf(y, pm['p'])
Пример #33
0
    def __call__(self, sample):
        if bernoulli.pmf(random.randrange(1, 11)%2,self.p) == 0.3:
            sample = np.fliplr(sample)

        return sample
Пример #34
0
from scipy.stats import bernoulli
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots(1, 1)

# Calculate a few first moments:
p = 0.3
mean, var, skew, kurt = bernoulli.stats(p, moments='mvsk')

# Display the probability mass function (pmf):
x = np.arange(bernoulli.ppf(0.01, p),
              bernoulli.ppf(0.99, p))
ax.plot(x, bernoulli.pmf(x, p), 'bo', ms=8, label='bernoulli pmf')
ax.vlines(x, 0, bernoulli.pmf(x, p), colors='b', lw=5, alpha=0.5)

# Freeze the distribution and display the frozen pmf:
rv = bernoulli(p)
ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1,
        label='frozen pmf')
ax.legend(loc='best', frameon=False)
plt.show()

# Check accuracy of cdf and ppf:
prob = bernoulli.cdf(x, p)
np.allclose(x, bernoulli.ppf(prob, p))

# Generate random numbers:
r = bernoulli.rvs(p, size=1000)
Пример #35
0
        fit = model.sampling(data=data)
        toc('Model fitting')

        means = get_posterior_means(fit)
        
        log_l = 0
        for subject,question in zip(*np.where(holdout==0)):
            r_subject = means['r_subject'][subject]    
            r_q = means['r_q'][question]   
            if subject < len(ctrl):
                r_q = means['r_q'][question]
            if subject >= len(ctrl):
                r_q = means['r_q_pd'][question]
            is_correct = correct[subject][question]
            prob_correct = logit(r_subject + r_q)
            likelihood = bernoulli.pmf(is_correct,prob_correct)
            log_l += np.log(likelihood)
        num_not_held_out = len(np.where(holdout==0)[0])
        print "Log-likelihood per sample in sample is %.2f" % (log_l/num_not_held_out)
        log_ls_in.append(log_l/num_not_held_out)

        log_l = 0
        for subject,question in zip(*np.where(holdout!=0)):
            r_subject = means['r_subject'][subject]
            if subject < len(ctrl):
                r_q = means['r_q'][question]
            if subject >= len(ctrl):
                r_q = means['r_q_pd'][question]
            is_correct = correct[subject][question]
            prob_correct = logit(r_subject + r_q)
            likelihood = bernoulli.pmf(is_correct,prob_correct)