示例#1
0
def generate_doc(i, phi, theta, num_words_in_doc=WORDS_IN_DOC):
    "i - порядковый номер генерируемого документа. Не должен превышать число столбцов theta
    topicvec = multinomial.rvs(num_words_in_doc, theta[:,i], size = 1, random_state = 1)
    words_in_doc = np.zeros(len(phi))
    for j in range(len(topicvec[0])):
        words_in_doc = words_in_doc + multinomial.rvs(topicvec[0][j], phi[:,j], size = 1, random_state = 1)
    return words_in_doc[0]
示例#2
0
def markov_sequence(p_init: np.array, p_transition: np.array, sequence_length: int) -> List[int]:
    """
    Generate a Markov sequence based on p_init and p_transition.
    """
    if p_init is None:
        p_init = equilibrium_distribution(p_transition)
    initial_state = list(multinomial.rvs(1, p_init)).index(1)

    states = [initial_state]
    for _ in range(sequence_length - 1):
        p_tr = p_transition[states[-1]]
        new_state = list(multinomial.rvs(1, p_tr)).index(1)
        states.append(new_state)
    return states
示例#3
0
文件: count.py 项目: ynshen/k-seq
def multinomial(p, N, seed=None):
    """Multinomial distribution for a given probability p and total number of draws"""
    if seed is not None:
        np.random.seed(seed)

    if np.sum(p) != 1:
        p = np.array(p) / np.sum(p)

    from scipy.stats import multinomial
    if isinstance(N, (list, np.ndarray, pd.Series)):
        return np.array([multinomial.rvs(n=int(n), p=p) for n in N])
    elif is_numeric(N):
        return multinomial.rvs(n=int(N), p=p)
    else:
        logging.error("Unknown N type", error_type=TypeError)
示例#4
0
def _upsample_mortality(years=None, regions=None):
    """Returns deaggregated (per-case) mortality for density plots.
    
    Args:
        years (list, optional): List with years to contain. All by default.
        regions (list, optional): List with years to contain. All by default.
    Returns:
        (pandas.DataFrame): Upsampled per-case mortality data.
    """
    # get data
    x = data()
    # filter
    if regions is not None:
        x = x[x.region.isin(regions)]
    if years is not None:
        x = x[x.year.isin(years)]
    # upsample
    cases = {'sex': [], 'age': [], 'country': [], 'year': []}
    for row in x.itertuples():
        age_cat = row.age_end - row.age_start + 1
        random_deaths = multinomial.rvs(int(row.deaths / 10), [1 / age_cat] *
                                        age_cat)  #, random_state = 12345)
        ages = list(range(row.age_start, row.age_end + 1))
        for age, deaths in zip(ages, random_deaths):
            for _ in range(deaths):
                cases['country'].append(row.region)
                cases['year'].append(row.year)
                cases['sex'].append(row.sex)
                cases['age'].append(age)
    cases = pd.DataFrame(cases)\
        .sort_values(by = 'sex', ascending = False)
    cases['date'] = None
    # return
    return cases
示例#5
0
    def generate(self, N=1):
        """Matches Language.generate: Generate 1 sample of N draws from theta.
        Returned sample consists of an array size K with (integer) number of
        draws of each category."""

        norm_alpha = [a/sum(self.alpha) for a in self.alpha]
        return multinomial.rvs(N, norm_alpha)
示例#6
0
def compute_z_galaxies(q, N):
    q[q <
      0] = 0  # Quick fix: sometimes the number returned by the gaussian is negative and then the code crashes
    p = q / q.sum()
    draws = multinomial.rvs(n=1, p=p, size=N)
    z = np.where(draws == 1)[1]
    return z, np.stack(draws)
示例#7
0
 def rvs(self, size=None):
     if size is None:
         size = self.parameters.initial_infectious
     assert (size >= self.parameters.initial_infectious)
     # Loop until we get a satisfactory sample.
     while True:
         # Pick `size` random ages.
         ages = self.age_structureRV.rvs(size=size)
         # Determine the status for each age.
         proportions = self._proportion(ages)
         status = proportions.columns
         status_ages = {k: [] for k in status}
         # `scipy.stats.multinomial.rvs()` can't handle multiple `p`s,
         # so we need to loop.
         for (age, row) in proportions.iterrows():
             # Randomly pick a status.
             rv = multinomial.rvs(1, row)
             # `rv` is an array with `1` in the position
             # picked and `0`s in the remaining positions.
             # Convert that to the name.
             s = status[rv == 1][0]
             # Add this `age` to the status list.
             status_ages[s].append(age)
         if (len(status_ages['susceptible']) <
                 self.parameters.initial_infectious):
             # We don't have enough susceptibles.  Loop again.
             continue
         else:
             # Convert a few susceptibles to infectious.
             for _ in range(self.parameters.initial_infectious):
                 age = status_ages['susceptible'].pop()
                 status_ages['infectious'].append(age)
             # This is a satisfactory sample, so end loop.
             break
     return status_ages
示例#8
0
def test_accumulator():
    """
    Tests that the posterior probability computed sequentially
    via accumulation is equal to the posterior probability
    computed in a batch manner.
    """
    theta = np.array([1 / 3, 1 / 3, 1 / 3])
    dirichlet_probability = np.array([1, 3, 2])
    dirichlet_concentration = 1
    dirichlet_alpha = dirichlet_probability * dirichlet_concentration
    sample_size = 40
    observations = multinomial.rvs(1, theta, size=sample_size)
    observations_sum = reduce(lambda x, y: x + y, observations)
    final_posterior = sequential_posteriors(
        observations,
        theta,
        dirichlet_probability=dirichlet_probability,
        dirichlet_concentration=dirichlet_concentration,
    )[-1]
    final_bf = bayes_factor(final_posterior)
    post_prob = posterior_probability(final_bf)
    log_marginal_likelihood_M1 = log_posterior_predictive(
        observations_sum, dirichlet_alpha)
    log_marginal_likelihood_M0 = multinomial.logpmf(observations_sum,
                                                    observations_sum.sum(),
                                                    theta)
    log_odds = log_marginal_likelihood_M1 - log_marginal_likelihood_M0
    odds = np.exp(log_odds)
    assert post_prob == approx(odds / (1 + odds))
示例#9
0
def multinomial_sample(X, lam, rng=None):
    """
    This draws multinomial samples from an urn using some poisson
    process denoted by lam.

    Parameters
    ----------
    X: array_like
       A matrix of counts where there are `n` rows and `m` columns
       where `n` corresponds to the number of samples and `m`
       corresponds to the number of species.
    lam : float
       Poisson parameter, which is also the mean and variance
       of the Poisson.
    rng: np.random.RandomState
       Numpy random state number generator.

    Returns
    -------
    np.array:
       A matrix of counts where
       there are `n` rows and `m` columns where `n` corresponds
       to the number of samples and `m` corresponds to the number
       of species.
    """
    if rng is None:
        rng = RandomState(0)
    seq_depths = poisson.rvs(lam, size=X.shape[0], random_state=rng)
    counts = [
        multinomial.rvs(seq_depths[i], X[i, :], random_state=rng)
        for i in range(len(seq_depths))
    ]
    return np.vstack(counts)
示例#10
0
def plot_violin(save = False, name = 'img/demographic/population.png'):
    """Constructs a violin plot of population (so called demographic curve).
    
    Args:
        save (bool, optional): Whether to cache or not.
        name (str, optional): Path of caching.
    """
    # fetch data
    df = _populations_data()
    # upsample
    cases = {'sex': [], 'age': [], 'country': []}
    for row in df.itertuples():
        age_cat = row.age_end - row.age_start + 1
        random_pops = multinomial.rvs(int(row.population / 100), [1/age_cat]*age_cat)
        ages = list(range(row.age_start, row.age_end + 1))
        for age,deaths in zip(ages, random_pops):
            for _ in range(deaths):
                cases['country'].append(row.region)
                cases['sex'].append(row.sex)
                cases['age'].append(age)
    cases = pd.DataFrame(cases)\
        .sort_values(by = 'sex', ascending = False)
    cases['date'] = None
    # plot
    fig1, ax1 = plt.subplots()
    sns.violinplot(x="country", y="age", hue="sex", data = cases, ax=ax1)
    if save: fig1.savefig(name)
示例#11
0
    def sample(self, point, n_samples=1):
        """Sample from the categorical distribution.

        Sample from the categorical distribution with parameters provided by
        point. This gives samples in the simplex.

        Parameters
        ----------
        point : array-like, shape=[..., dim + 1]
            Parameters of a categorical distribution, i.e. probabilities
            associated to dim + 1 outcomes.
        n_samples : int
            Number of points to sample with each set of parameters in point.
            Optional, default: 1.

        Returns
        -------
        samples : array-like, shape=[..., n_samples]
            Samples from categorical distributions.
        """
        geomstats.errors.check_belongs(point, self)
        point = gs.to_ndarray(point, to_ndim=2)
        samples = []
        for param in point:
            counts = multinomial.rvs(1, param, size=n_samples)
            samples.append(gs.argmax(counts, axis=-1))
        return samples[0] if len(point) == 1 else gs.stack(samples)
示例#12
0
def multinomial_sample(X, depths, rng=None):
    """
    This draws multinomial samples from an urn using some poisson
    process denoted by lam.
    Parameters
    ----------
    X: array_like
       A matrix of counts where there are `n` rows and `m` columns
       where `n` corresponds to the number of samples and `m`
       corresponds to the number of species.
    depths : np.array
       Sampling depths for each of the multinomial samples.
    rng: np.random.RandomState
       Numpy random state number generator.
    Returns
    -------
    np.array:
       A matrix of counts where
       there are `n` rows and `m` columns where `n` corresponds
       to the number of samples and `m` corresponds to the number
       of species.
    """
    if rng is None:
        rng = RandomState(0)
    counts = [
        multinomial.rvs(depths[i], X[i, :], random_state=rng)
        for i in range(len(depths))
    ]
    return np.vstack(counts)
示例#13
0
 def draw(self, K = 10, N = 1*10**5, m = 3, gaussian = False):
     
     if self.seed is not None:
         np.random.seed(self.seed)
  
     alphas = gamma.rvs(5, size=m)               # shape parameter
     #print(sum(alphas))                              # equivalent sample size
     self.p = dirichlet.rvs(alpha = alphas, size = 1)[0]
     self.phi_is = multinomial.rvs(1, self.p, size=N)       # draw from categorical p.m.f
     
     self.x_draws = np.zeros((N,K))
     self.hyper_loc, self.hyper_scale, self.thetas, self.var, self.covs, self.rdraws = dict(), dict(), dict(), tuple(), tuple(), tuple()
     
     for i in range(m):
     
           self.hyper_loc["mean"+str(i+1)] = norm.rvs(size = 1, loc = 0, scale = 5)
           self.hyper_scale["scale"+str(i+1)] = 1/gamma.rvs(5, size=1)
           
           self.thetas["mean"+str(i+1)] = norm.rvs(size = K, loc = self.hyper_loc["mean"+str(i+1)], 
                       scale = self.hyper_scale["scale"+str(i+1)])
           self.thetas["Sigma"+str(i+1)] = np.eye(K)*(1/gamma.rvs(5, size=K))
           self.thetas["nu"+str(i+1)] = randint.rvs(K+2, K+10, size=1)[0]
     
           if gaussian:
              self.covs += (self.thetas['Sigma'+str(i+1)], )
           else:
              self.covs += (wishart.rvs(df = self.thetas['nu'+str(i+1)], scale = self.thetas['Sigma'+str(i+1)], size=1),)
              self.var += (self.thetas["nu"+str(i+1)]/(self.thetas["nu"+str(i+1)]-2)*self.covs[i],)       # variance covariance matrix of first Student-t component
           self.rdraws += (np.random.multivariate_normal(self.thetas["mean"+str(i+1)], self.covs[i], N),)
     
           self.Phi = np.tile(self.phi_is[:,i], K).reshape(K,N).T              # repeat phi vector to match with random matrix
           self.x_draws += np.multiply(self.Phi, self.rdraws[i])                
     return self.x_draws
示例#14
0
def gen_surrogate_data(n_point, p_cat, low, high, alpha, xmin, xmax, discrete,
                       random_state):
    """
    Generate surrogate data points
    :param n_point: total number of data points
    :param p_cat: probability of `low`, `pareto` and `high` categories
    :param low, high: data to be subsampled (with replacement) for categories `low` and `high`
    :param alpha: exponent of the `pareto` regime
    :param xmin, xmax: boundaries of the `pareto` regime, so that all(low<xmin) and all (xmax<=high)
    :param discrete: use zipf distribution instead of pareto, bool
    :param random_state:
    :return: surrogate sample
    """
    random_state = check_random_state(random_state)
    s_low, s_mid, s_high = multinomial.rvs(n_point,
                                           p_cat,
                                           random_state=random_state)
    sample = np.empty(n_point, dtype=float)
    if s_low:
        sample[0:s_low] = random_state.choice(low, s_low, replace=True)
    if s_high:
        sample[s_low + s_mid:n_point] = random_state.choice(high,
                                                            s_high,
                                                            replace=True)

    sample[s_low:s_low + s_mid] = dispatch_rvs(alpha,
                                               xmin,
                                               xmax,
                                               discrete,
                                               size=s_mid,
                                               random_state=random_state)

    random_state.shuffle(sample)
    return sample
示例#15
0
def create_dataset(n_dim,
                   n_clust,
                   n_tasks,
                   n_entities,
                   seed=None,
                   pi_samp=None,
                   Si_samp=None,
                   mu_samp=None):
    """
    Create the amortised clustering dataset
    :param n_dim: number of dimensions
    :param n_clust: pair (lo,hi) number of clusters uniformly in the range(lo,hi)
    :param n_tasks: number of tasks
    :param n_entities: pair (lo,hi) number of entities uniformly in the range(lo,hi)
    :param seed: random seed
    :return: data set
    """
    if seed is not None:
        np.random.seed(seed)

    tasks = []
    for i in range(n_tasks):

        n_clust_ = np.random.randint(*n_clust)
        Si = np.zeros((n_clust_, n_dim, n_dim))
        mu = np.zeros((n_clust_, n_dim))
        x = []
        idx = []

        n_ent = np.random.randint(*n_entities)

        if pi_samp is not None:
            pi = pi_samp(n_clust_)
        else:
            pi = np.ones(n_clust_) / n_clust_

        for j, n in enumerate(*multinomial.rvs(n_ent, pi, 1)):
            if Si_samp is not None:
                Si[j] = Si_samp(n_dim)
            else:
                Si[j] = invwishart.rvs(4, 0.05 * np.eye(n_dim))

            if mu_samp is not None:
                mu[j] = mu_samp(n_dim)
            else:
                mu[j] = np.random.randn(n_dim)
            if n > 0:
                x.append(
                    multivariate_normal.rvs(mu[j], Si[j], size=[n]).astype(
                        np.float32).reshape(n, -1))
                idx.append(j * np.ones(n, dtype=np.long))

        j = np.random.permutation(n_ent)
        x = np.concatenate(x, 0)[j]
        idx = np.concatenate(idx, 0)[j]

        tasks.append((x, idx, mu, Si))

    return tasks
示例#16
0
    def draw(self, K=10, N=1 * 10**5, m=3, gaussian=False):
        """
        Inputs:
        -------
        N: sample size
        K: Dimension of Normal/Student distr.
        m: number of mixture components
        """
        np.random.seed(self.seed)
        self.st0 = np.random.get_state()  # get initial state of RNG
        #np.random.set_state(self.st0)
        print("Drawing from", m, "component mixture distribution.")
        alphas = gamma.rvs(5, size=m)  # shape parameter
        #print(sum(alphas))                              # equivalent sample size
        self.p = dirichlet.rvs(alpha=alphas, size=1)[0]
        self.phi_is = multinomial.rvs(1, self.p,
                                      size=N)  # draw from categorical p.m.f

        self.x_draws = np.zeros((N, K))
        self.hyper_loc, self.hyper_scale, self.thetas, self.var, self.covs, self.rdraws = dict(
        ), dict(), dict(), tuple(), tuple(), tuple()

        for i in range(m):

            self.hyper_loc["mean" + str(i + 1)] = norm.rvs(size=1,
                                                           loc=0,
                                                           scale=5)
            self.hyper_scale["scale" + str(i + 1)] = 1 / gamma.rvs(5, size=1)

            self.thetas["mean" + str(i + 1)] = norm.rvs(
                size=K,
                loc=self.hyper_loc["mean" + str(i + 1)],
                scale=self.hyper_scale["scale" + str(i + 1)])
            self.thetas["Sigma" +
                        str(i + 1)] = np.eye(K) * (1 / gamma.rvs(5, size=K))
            self.thetas["nu" + str(i + 1)] = randint.rvs(K + 2, K + 10,
                                                         size=1)[0]

            if gaussian:
                self.covs += (self.thetas['Sigma' + str(i + 1)], )
            else:
                self.covs += (wishart.rvs(df=self.thetas['nu' + str(i + 1)],
                                          scale=self.thetas['Sigma' +
                                                            str(i + 1)],
                                          size=1), )
                self.var += (
                    self.thetas["nu" + str(i + 1)] /
                    (self.thetas["nu" + str(i + 1)] - 2) * self.covs[i],
                )  # variance covariance matrix of first Student-t component
            self.rdraws += (np.random.multivariate_normal(
                self.thetas["mean" + str(i + 1)], self.covs[i], N), )

            self.Phi = np.tile(self.phi_is[:, i], K).reshape(
                K, N).T  # repeat phi vector to match with random matrix
            self.x_draws += np.multiply(self.Phi, self.rdraws[i])

        return self.x_draws, np.argmax(self.phi_is, 1)  # X, latent
示例#17
0
def multinomial_robust(NN, p, size=None):
    if NN < 1000:
        return multinomial.rvs(NN, p)
    else:
        results = np.array([binomial_robust(NN, pi, size) for pi in p])
        last_entry = int(NN) - results[:-1].sum(0)
        while last_entry < 0:
            results = np.array([binomial_robust(NN, pi, size) for pi in p])
            last_entry = int(NN) - results[:-1].sum(0)
        return np.rollaxis(results, 0, results.ndim)
示例#18
0
def test_p_values_decreasing_and_in_range():
    p_0 = np.array([1 / 3, 1 / 3, 1 / 3])
    p_1 = np.array([2 / 9, 4 / 9, 3 / 9])
    sample_size = 40
    data = multinomial.rvs(1, p_1, size=sample_size)
    pvals = sequential_p_values(data, p_0)
    for ix in range(1, sample_size):
        assert pvals[ix] <= pvals[ix - 1]  # pvals should be non increasing
    for pval in pvals:
        assert 0.0 <= pval and pval <= 1.0
示例#19
0
 def samples(self, F, num_samples,Y_metadata=None):
     eF = safe_exp(F)
     den = 1 + eF.sum(1)[:, None]
     p = eF / np.tile(den, eF.shape[1])
     p = np.hstack((p, 1 / den))
     p = np.clip(p, 1e-9, 1 - 1e-9)
     p = p / np.tile(p.sum(1)[:,None], (1, p.shape[1]))
     samples = np.empty((F.shape[0], self.K))
     for i in range(F.shape[0]):
         samples[i,:] = multinomial.rvs(n=1, p=p[i,:], size=1)
     return self.invonehot(Y=samples)
示例#20
0
文件: utils.py 项目: celsiustx/solo
def create_multinomial_doublet(X: np.ndarray, i: int, j: int, **kwargs):
    '''make a multinomial combination of 2 cells

    Parameters
    ----------
    X : np.array
        cell by genes matrix
    i : int,
        randomly chosen ith cell
    j : int,
        randomly chosen jth cell
    kwargs : dict,
        dict with doublet_depth, cell_depths and cells_ids as keys
        doublet_depth is an int
        cell_depths is an list of all cells total UMI counts as ints
        cell_ids list of lists with genes with counts for each cell
    Returns
    -------
    float
        multinomial expression vector of two cells
    '''
    doublet_depth = kwargs["doublet_depth"]
    cell_depths = kwargs["cell_depths"]
    cells_ids = kwargs["cells_ids"]
    randomize_doublet_size = kwargs["randomize_doublet_size"]

    # add their counts
    dp = X[i] + X[j]
    non_zero_indexes = np.unique(cells_ids[i] + cells_ids[j])
    if issparse(X):
        dp = dp.data
    else:
        dp = np.ravel(dp)
        dp = dp[non_zero_indexes]

    # a huge hack caused by
    # https://github.com/numpy/numpy/issues/8317
    # fun fun fun https://stackoverflow.com/questions/23257587/how-can-i-avoid-value-errors-when-using-numpy-random-multinomial
    # okay with this hack because affects pro

    # normalize
    dp /= dp.sum()
    if randomize_doublet_size:
        scale_factor = np.random.uniform(1., doublet_depth)
    else:
        scale_factor = doublet_depth
    # choose depth
    dd = int(scale_factor * (cell_depths[i] + cell_depths[j]) / 2)

    # sample counts from multinomial
    non_zero_probs = multinomial.rvs(n=dd, p=dp)
    probs = np.zeros(X.shape[1])
    probs[non_zero_indexes] = non_zero_probs
    return csr_matrix(probs) if issparse(X) else probs
        def transform_single(index):
            column = X[:, index].copy()
            mask = pd.isnull(column)
            values, probabilities = self.statistics_[index]

            sample = np.argmax(multinomial.rvs(p=probabilities,
                                               n=1,
                                               size=mask.sum(),
                                               random_state=self.random_state),
                               axis=1)
            column[mask] = np.vectorize(lambda pick: values[pick])(sample)
            return column
示例#22
0
    def sample_from_multinomial(self, sampletimes=1):
        if self.valuenumber is None:
            self.set_random_numbers()
            self.set_random_probabilities()

        sample = np.zeros(sampletimes)

        for x in xrange(0, sampletimes):
            One_sample = multinomial.rvs(1, self.probabilities)
            sample[x] = np.where(One_sample == 1)[0] + 1

        self.temporary_sample = sample
示例#23
0
 def sample(self, size=1):
     import numpy as np        
     if isinstance(size, int):
         if self.input is None: size = [0]*size
         else: size = self.input.sample(size)
     elif self.input is None: raise ValueError('no input model provided to index into')
     params = [self.param(self.get_beta(idx)) for idx in size]
     if self.kind=='con':
         from scipy.stats import norm
         return np.array([norm.rvs(loc=p, scale=self.sigma, size=1) for p in params])
     else:
         from scipy.stats import multinomial
         return np.array([multinomial.rvs(n=1, p=p, size=1).argmax() for p in params])
示例#24
0
    def __getitem__(self, index):
        indexes = self.indexes[index * self.batch_size:(index + 1) *
                               self.batch_size]

        X = self.X_train[indexes, :, :, :]
        if self.is_data_augment and not self.is_validate:
            if self.data_augment_noise_type == 'normal':
                noises = np.random.normal(loc=self.normal_loc,
                                          scale=1.,
                                          size=X.shape)
            elif self.data_augment_noise_type == 'uniform':
                noises = np.random.uniform(low=-1., high=1., size=X.shape)
            X += 0.001 * noises

        if self.is_validate:
            y = self.y_train[indexes, :]
        else:
            num_label_case = np.sum([
                self.is_soft_label, self.is_sample_label_dist,
                self.is_mix_label_original
            ])
            assert num_label_case == len(self.list_label_case)

            idx_label_case = np.random.randint(num_label_case,
                                               size=len(indexes))
            y = np.zeros((len(indexes), self.y_train.shape[1]))
            for i, ind in enumerate(indexes):
                name_label_case = self.list_label_case[idx_label_case[i]]
                if name_label_case == 'soft_label':
                    y[i] = self.y_train[ind]
                elif name_label_case == 'sample_label_dist':
                    y_prob = self.y_train[ind].astype(np.float64)
                    y_prob /= np.sum(y_prob)
                    if np.sum(y_prob) > 1:  # due to numerical precision
                        y_prob += np.finfo(float).eps
                        y_prob /= np.sum(y_prob)
                    y[i] = multinomial.rvs(1, y_prob, 1)
                elif name_label_case == 'mix_label_original':
                    y[i] = self.y_original[ind]
                else:
                    raise ValueError(
                        'not existing label case: {}'.format(name_label_case))
                #print (i, ind, name_label_case,y[i])

        assert np.sum(y) == len(indexes)
        assert np.all(
            np.abs(np.sum(y, axis=1) - 1.) < 1e-5
        ), 'label probability does not sum up to 1\n{}\n{}'.format(
            np.sum(y, axis=1), y)

        return X, y
示例#25
0
 def __call__(self, NUM=None, P=None, sampletimes=1):
     self.NUM = NUM
     if P is None:
         if NUM is None:
             self.gen_NUM()
         self.gen_P()
     else:
         self.P = P
     # Return a Sample
     sample = np.zeros(sampletimes)
     for x in xrange(0, sampletimes):
         One_sample = multinomial.rvs(1, self.P)
         sample[x] = np.where(One_sample == 1)[0]
     return sample
示例#26
0
 def distribute_doses(self, model: SIR) -> Tuple[np.array]:
     if self.exhausted(model):
         return (np.zeros(self.age_ratios.shape),
                 np.zeros(self.age_ratios.shape),
                 np.zeros(self.age_ratios.shape))
     dV = (model.S[-1] /
           model.N[-1]) * self.daily_doses * self.effectiveness
     model.S[-1] -= dV
     model.parallel_forward_epi_step()
     distributed_doses = Multinomial.rvs(self.daily_doses, self.age_ratios)
     effective_doses = self.effectiveness * distributed_doses
     immunizing_doses = (model.S[-1].mean() /
                         model.N[-1].mean()) * effective_doses
     self.bin_populations -= immunizing_doses.astype(int)
     return (distributed_doses, effective_doses, immunizing_doses)
示例#27
0
def gen_surrogate_counts(n_point, p_cat, p_low, p_high, alpha, xmin, xmax,
                         bins, discrete, random_state):
    """
    Generate surrogate hit counts
    :param n_point: total number of data points
    :param p_cat: probability of `low`, `pareto` and `high` categories
    :param p_low, p_high: hit probabilities within categories `low` and `high`
    :param alpha: exponent of the `pareto` regime
    :param xmin, xmax: boundaries of the `pareto` regime, so that all(low<xmin) and all(xmax<=high)
    :param bins: bin boundaries (used for calculating cdf and or binning samples)
    :param discrete: use zipf distribution instead of pareto, bool
    :param random_state:
    :return: surrogate hit counts
    """
    random_state = check_random_state(random_state)
    s_low, s_mid, s_high = multinomial.rvs(n_point,
                                           p_cat,
                                           random_state=random_state)
    # TODO: the same can be achieved by using the cdf and multinomial sampling, see whether it is stable enough.
    sample = dispatch_rvs(alpha,
                          xmin,
                          xmax,
                          discrete,
                          size=s_mid,
                          random_state=random_state)

    counts, _ = np.histogram(sample, bins)
    if s_low:
        counts[0:len(p_low)] = multinomial.rvs(s_low,
                                               p_low,
                                               random_state=random_state)
    if s_high:
        counts[len(counts) - len(p_high):len(counts)] = multinomial.rvs(
            s_low, p_high, random_state=random_state)

    return counts
示例#28
0
 def _resample(self, n, prob, classes, grouped_data):
     samples_no = multinomial.rvs(n=n,
                                  p=prob,
                                  random_state=self.random_state)
     subset_x, subset_y = [], []
     for no, j in enumerate(classes):
         data = grouped_data[j]
         resample_class = resample(data,
                                   replace=True,
                                   n_samples=samples_no[no],
                                   random_state=self.random_state)
         for sample in resample_class:
             subset_x.append(sample[0])
             subset_y.append(sample[1])
     return np.array(subset_x), np.array(subset_y)
示例#29
0
def compute_conditional_z(q, y, mu,
                          sigma_square):  #Could be optimised in future work
    n = np.shape(y)[0]
    d = np.shape(mu)[0]
    z = np.empty(shape=(n))
    i = np.empty(shape=(n, d))
    for l in range(n):
        temp = np.empty(shape=(d))
        for j in range(d):
            temp[j] = q[j] * multivariate_normal.pdf(
                y[l, 0], mean=mu[j], cov=sigma_square[j])
        temp[temp < 0] = 0
        temp = temp / np.sum(temp)
        i[l, :] = multinomial.rvs(n=1, p=temp, size=1)[0]
    z = np.where(i == 1)[1]
    return z, i
示例#30
0
def generate_data(n, seed=None, x=None):
    if seed is not None:
        np.random.seed(seed)
    if x is None:
        x = np.random.uniform(size=(n, 1))

    eta_1, eta_2, eta_3 = etas(x)
    class_probs = np.hstack((eta_1, eta_2, eta_3))
    y_cats = np.array([
        multinomial.rvs(1,
                        class_probs[i],
                        random_state=(seed if i == 0 else None))
        for i in range(x.shape[0])
    ])
    y = np.argmax(y_cats, axis=1)
    return x, y