def generate_doc(i, phi, theta, num_words_in_doc=WORDS_IN_DOC): "i - порядковый номер генерируемого документа. Не должен превышать число столбцов theta topicvec = multinomial.rvs(num_words_in_doc, theta[:,i], size = 1, random_state = 1) words_in_doc = np.zeros(len(phi)) for j in range(len(topicvec[0])): words_in_doc = words_in_doc + multinomial.rvs(topicvec[0][j], phi[:,j], size = 1, random_state = 1) return words_in_doc[0]
def markov_sequence(p_init: np.array, p_transition: np.array, sequence_length: int) -> List[int]: """ Generate a Markov sequence based on p_init and p_transition. """ if p_init is None: p_init = equilibrium_distribution(p_transition) initial_state = list(multinomial.rvs(1, p_init)).index(1) states = [initial_state] for _ in range(sequence_length - 1): p_tr = p_transition[states[-1]] new_state = list(multinomial.rvs(1, p_tr)).index(1) states.append(new_state) return states
def multinomial(p, N, seed=None): """Multinomial distribution for a given probability p and total number of draws""" if seed is not None: np.random.seed(seed) if np.sum(p) != 1: p = np.array(p) / np.sum(p) from scipy.stats import multinomial if isinstance(N, (list, np.ndarray, pd.Series)): return np.array([multinomial.rvs(n=int(n), p=p) for n in N]) elif is_numeric(N): return multinomial.rvs(n=int(N), p=p) else: logging.error("Unknown N type", error_type=TypeError)
def _upsample_mortality(years=None, regions=None): """Returns deaggregated (per-case) mortality for density plots. Args: years (list, optional): List with years to contain. All by default. regions (list, optional): List with years to contain. All by default. Returns: (pandas.DataFrame): Upsampled per-case mortality data. """ # get data x = data() # filter if regions is not None: x = x[x.region.isin(regions)] if years is not None: x = x[x.year.isin(years)] # upsample cases = {'sex': [], 'age': [], 'country': [], 'year': []} for row in x.itertuples(): age_cat = row.age_end - row.age_start + 1 random_deaths = multinomial.rvs(int(row.deaths / 10), [1 / age_cat] * age_cat) #, random_state = 12345) ages = list(range(row.age_start, row.age_end + 1)) for age, deaths in zip(ages, random_deaths): for _ in range(deaths): cases['country'].append(row.region) cases['year'].append(row.year) cases['sex'].append(row.sex) cases['age'].append(age) cases = pd.DataFrame(cases)\ .sort_values(by = 'sex', ascending = False) cases['date'] = None # return return cases
def generate(self, N=1): """Matches Language.generate: Generate 1 sample of N draws from theta. Returned sample consists of an array size K with (integer) number of draws of each category.""" norm_alpha = [a/sum(self.alpha) for a in self.alpha] return multinomial.rvs(N, norm_alpha)
def compute_z_galaxies(q, N): q[q < 0] = 0 # Quick fix: sometimes the number returned by the gaussian is negative and then the code crashes p = q / q.sum() draws = multinomial.rvs(n=1, p=p, size=N) z = np.where(draws == 1)[1] return z, np.stack(draws)
def rvs(self, size=None): if size is None: size = self.parameters.initial_infectious assert (size >= self.parameters.initial_infectious) # Loop until we get a satisfactory sample. while True: # Pick `size` random ages. ages = self.age_structureRV.rvs(size=size) # Determine the status for each age. proportions = self._proportion(ages) status = proportions.columns status_ages = {k: [] for k in status} # `scipy.stats.multinomial.rvs()` can't handle multiple `p`s, # so we need to loop. for (age, row) in proportions.iterrows(): # Randomly pick a status. rv = multinomial.rvs(1, row) # `rv` is an array with `1` in the position # picked and `0`s in the remaining positions. # Convert that to the name. s = status[rv == 1][0] # Add this `age` to the status list. status_ages[s].append(age) if (len(status_ages['susceptible']) < self.parameters.initial_infectious): # We don't have enough susceptibles. Loop again. continue else: # Convert a few susceptibles to infectious. for _ in range(self.parameters.initial_infectious): age = status_ages['susceptible'].pop() status_ages['infectious'].append(age) # This is a satisfactory sample, so end loop. break return status_ages
def test_accumulator(): """ Tests that the posterior probability computed sequentially via accumulation is equal to the posterior probability computed in a batch manner. """ theta = np.array([1 / 3, 1 / 3, 1 / 3]) dirichlet_probability = np.array([1, 3, 2]) dirichlet_concentration = 1 dirichlet_alpha = dirichlet_probability * dirichlet_concentration sample_size = 40 observations = multinomial.rvs(1, theta, size=sample_size) observations_sum = reduce(lambda x, y: x + y, observations) final_posterior = sequential_posteriors( observations, theta, dirichlet_probability=dirichlet_probability, dirichlet_concentration=dirichlet_concentration, )[-1] final_bf = bayes_factor(final_posterior) post_prob = posterior_probability(final_bf) log_marginal_likelihood_M1 = log_posterior_predictive( observations_sum, dirichlet_alpha) log_marginal_likelihood_M0 = multinomial.logpmf(observations_sum, observations_sum.sum(), theta) log_odds = log_marginal_likelihood_M1 - log_marginal_likelihood_M0 odds = np.exp(log_odds) assert post_prob == approx(odds / (1 + odds))
def multinomial_sample(X, lam, rng=None): """ This draws multinomial samples from an urn using some poisson process denoted by lam. Parameters ---------- X: array_like A matrix of counts where there are `n` rows and `m` columns where `n` corresponds to the number of samples and `m` corresponds to the number of species. lam : float Poisson parameter, which is also the mean and variance of the Poisson. rng: np.random.RandomState Numpy random state number generator. Returns ------- np.array: A matrix of counts where there are `n` rows and `m` columns where `n` corresponds to the number of samples and `m` corresponds to the number of species. """ if rng is None: rng = RandomState(0) seq_depths = poisson.rvs(lam, size=X.shape[0], random_state=rng) counts = [ multinomial.rvs(seq_depths[i], X[i, :], random_state=rng) for i in range(len(seq_depths)) ] return np.vstack(counts)
def plot_violin(save = False, name = 'img/demographic/population.png'): """Constructs a violin plot of population (so called demographic curve). Args: save (bool, optional): Whether to cache or not. name (str, optional): Path of caching. """ # fetch data df = _populations_data() # upsample cases = {'sex': [], 'age': [], 'country': []} for row in df.itertuples(): age_cat = row.age_end - row.age_start + 1 random_pops = multinomial.rvs(int(row.population / 100), [1/age_cat]*age_cat) ages = list(range(row.age_start, row.age_end + 1)) for age,deaths in zip(ages, random_pops): for _ in range(deaths): cases['country'].append(row.region) cases['sex'].append(row.sex) cases['age'].append(age) cases = pd.DataFrame(cases)\ .sort_values(by = 'sex', ascending = False) cases['date'] = None # plot fig1, ax1 = plt.subplots() sns.violinplot(x="country", y="age", hue="sex", data = cases, ax=ax1) if save: fig1.savefig(name)
def sample(self, point, n_samples=1): """Sample from the categorical distribution. Sample from the categorical distribution with parameters provided by point. This gives samples in the simplex. Parameters ---------- point : array-like, shape=[..., dim + 1] Parameters of a categorical distribution, i.e. probabilities associated to dim + 1 outcomes. n_samples : int Number of points to sample with each set of parameters in point. Optional, default: 1. Returns ------- samples : array-like, shape=[..., n_samples] Samples from categorical distributions. """ geomstats.errors.check_belongs(point, self) point = gs.to_ndarray(point, to_ndim=2) samples = [] for param in point: counts = multinomial.rvs(1, param, size=n_samples) samples.append(gs.argmax(counts, axis=-1)) return samples[0] if len(point) == 1 else gs.stack(samples)
def multinomial_sample(X, depths, rng=None): """ This draws multinomial samples from an urn using some poisson process denoted by lam. Parameters ---------- X: array_like A matrix of counts where there are `n` rows and `m` columns where `n` corresponds to the number of samples and `m` corresponds to the number of species. depths : np.array Sampling depths for each of the multinomial samples. rng: np.random.RandomState Numpy random state number generator. Returns ------- np.array: A matrix of counts where there are `n` rows and `m` columns where `n` corresponds to the number of samples and `m` corresponds to the number of species. """ if rng is None: rng = RandomState(0) counts = [ multinomial.rvs(depths[i], X[i, :], random_state=rng) for i in range(len(depths)) ] return np.vstack(counts)
def draw(self, K = 10, N = 1*10**5, m = 3, gaussian = False): if self.seed is not None: np.random.seed(self.seed) alphas = gamma.rvs(5, size=m) # shape parameter #print(sum(alphas)) # equivalent sample size self.p = dirichlet.rvs(alpha = alphas, size = 1)[0] self.phi_is = multinomial.rvs(1, self.p, size=N) # draw from categorical p.m.f self.x_draws = np.zeros((N,K)) self.hyper_loc, self.hyper_scale, self.thetas, self.var, self.covs, self.rdraws = dict(), dict(), dict(), tuple(), tuple(), tuple() for i in range(m): self.hyper_loc["mean"+str(i+1)] = norm.rvs(size = 1, loc = 0, scale = 5) self.hyper_scale["scale"+str(i+1)] = 1/gamma.rvs(5, size=1) self.thetas["mean"+str(i+1)] = norm.rvs(size = K, loc = self.hyper_loc["mean"+str(i+1)], scale = self.hyper_scale["scale"+str(i+1)]) self.thetas["Sigma"+str(i+1)] = np.eye(K)*(1/gamma.rvs(5, size=K)) self.thetas["nu"+str(i+1)] = randint.rvs(K+2, K+10, size=1)[0] if gaussian: self.covs += (self.thetas['Sigma'+str(i+1)], ) else: self.covs += (wishart.rvs(df = self.thetas['nu'+str(i+1)], scale = self.thetas['Sigma'+str(i+1)], size=1),) self.var += (self.thetas["nu"+str(i+1)]/(self.thetas["nu"+str(i+1)]-2)*self.covs[i],) # variance covariance matrix of first Student-t component self.rdraws += (np.random.multivariate_normal(self.thetas["mean"+str(i+1)], self.covs[i], N),) self.Phi = np.tile(self.phi_is[:,i], K).reshape(K,N).T # repeat phi vector to match with random matrix self.x_draws += np.multiply(self.Phi, self.rdraws[i]) return self.x_draws
def gen_surrogate_data(n_point, p_cat, low, high, alpha, xmin, xmax, discrete, random_state): """ Generate surrogate data points :param n_point: total number of data points :param p_cat: probability of `low`, `pareto` and `high` categories :param low, high: data to be subsampled (with replacement) for categories `low` and `high` :param alpha: exponent of the `pareto` regime :param xmin, xmax: boundaries of the `pareto` regime, so that all(low<xmin) and all (xmax<=high) :param discrete: use zipf distribution instead of pareto, bool :param random_state: :return: surrogate sample """ random_state = check_random_state(random_state) s_low, s_mid, s_high = multinomial.rvs(n_point, p_cat, random_state=random_state) sample = np.empty(n_point, dtype=float) if s_low: sample[0:s_low] = random_state.choice(low, s_low, replace=True) if s_high: sample[s_low + s_mid:n_point] = random_state.choice(high, s_high, replace=True) sample[s_low:s_low + s_mid] = dispatch_rvs(alpha, xmin, xmax, discrete, size=s_mid, random_state=random_state) random_state.shuffle(sample) return sample
def create_dataset(n_dim, n_clust, n_tasks, n_entities, seed=None, pi_samp=None, Si_samp=None, mu_samp=None): """ Create the amortised clustering dataset :param n_dim: number of dimensions :param n_clust: pair (lo,hi) number of clusters uniformly in the range(lo,hi) :param n_tasks: number of tasks :param n_entities: pair (lo,hi) number of entities uniformly in the range(lo,hi) :param seed: random seed :return: data set """ if seed is not None: np.random.seed(seed) tasks = [] for i in range(n_tasks): n_clust_ = np.random.randint(*n_clust) Si = np.zeros((n_clust_, n_dim, n_dim)) mu = np.zeros((n_clust_, n_dim)) x = [] idx = [] n_ent = np.random.randint(*n_entities) if pi_samp is not None: pi = pi_samp(n_clust_) else: pi = np.ones(n_clust_) / n_clust_ for j, n in enumerate(*multinomial.rvs(n_ent, pi, 1)): if Si_samp is not None: Si[j] = Si_samp(n_dim) else: Si[j] = invwishart.rvs(4, 0.05 * np.eye(n_dim)) if mu_samp is not None: mu[j] = mu_samp(n_dim) else: mu[j] = np.random.randn(n_dim) if n > 0: x.append( multivariate_normal.rvs(mu[j], Si[j], size=[n]).astype( np.float32).reshape(n, -1)) idx.append(j * np.ones(n, dtype=np.long)) j = np.random.permutation(n_ent) x = np.concatenate(x, 0)[j] idx = np.concatenate(idx, 0)[j] tasks.append((x, idx, mu, Si)) return tasks
def draw(self, K=10, N=1 * 10**5, m=3, gaussian=False): """ Inputs: ------- N: sample size K: Dimension of Normal/Student distr. m: number of mixture components """ np.random.seed(self.seed) self.st0 = np.random.get_state() # get initial state of RNG #np.random.set_state(self.st0) print("Drawing from", m, "component mixture distribution.") alphas = gamma.rvs(5, size=m) # shape parameter #print(sum(alphas)) # equivalent sample size self.p = dirichlet.rvs(alpha=alphas, size=1)[0] self.phi_is = multinomial.rvs(1, self.p, size=N) # draw from categorical p.m.f self.x_draws = np.zeros((N, K)) self.hyper_loc, self.hyper_scale, self.thetas, self.var, self.covs, self.rdraws = dict( ), dict(), dict(), tuple(), tuple(), tuple() for i in range(m): self.hyper_loc["mean" + str(i + 1)] = norm.rvs(size=1, loc=0, scale=5) self.hyper_scale["scale" + str(i + 1)] = 1 / gamma.rvs(5, size=1) self.thetas["mean" + str(i + 1)] = norm.rvs( size=K, loc=self.hyper_loc["mean" + str(i + 1)], scale=self.hyper_scale["scale" + str(i + 1)]) self.thetas["Sigma" + str(i + 1)] = np.eye(K) * (1 / gamma.rvs(5, size=K)) self.thetas["nu" + str(i + 1)] = randint.rvs(K + 2, K + 10, size=1)[0] if gaussian: self.covs += (self.thetas['Sigma' + str(i + 1)], ) else: self.covs += (wishart.rvs(df=self.thetas['nu' + str(i + 1)], scale=self.thetas['Sigma' + str(i + 1)], size=1), ) self.var += ( self.thetas["nu" + str(i + 1)] / (self.thetas["nu" + str(i + 1)] - 2) * self.covs[i], ) # variance covariance matrix of first Student-t component self.rdraws += (np.random.multivariate_normal( self.thetas["mean" + str(i + 1)], self.covs[i], N), ) self.Phi = np.tile(self.phi_is[:, i], K).reshape( K, N).T # repeat phi vector to match with random matrix self.x_draws += np.multiply(self.Phi, self.rdraws[i]) return self.x_draws, np.argmax(self.phi_is, 1) # X, latent
def multinomial_robust(NN, p, size=None): if NN < 1000: return multinomial.rvs(NN, p) else: results = np.array([binomial_robust(NN, pi, size) for pi in p]) last_entry = int(NN) - results[:-1].sum(0) while last_entry < 0: results = np.array([binomial_robust(NN, pi, size) for pi in p]) last_entry = int(NN) - results[:-1].sum(0) return np.rollaxis(results, 0, results.ndim)
def test_p_values_decreasing_and_in_range(): p_0 = np.array([1 / 3, 1 / 3, 1 / 3]) p_1 = np.array([2 / 9, 4 / 9, 3 / 9]) sample_size = 40 data = multinomial.rvs(1, p_1, size=sample_size) pvals = sequential_p_values(data, p_0) for ix in range(1, sample_size): assert pvals[ix] <= pvals[ix - 1] # pvals should be non increasing for pval in pvals: assert 0.0 <= pval and pval <= 1.0
def samples(self, F, num_samples,Y_metadata=None): eF = safe_exp(F) den = 1 + eF.sum(1)[:, None] p = eF / np.tile(den, eF.shape[1]) p = np.hstack((p, 1 / den)) p = np.clip(p, 1e-9, 1 - 1e-9) p = p / np.tile(p.sum(1)[:,None], (1, p.shape[1])) samples = np.empty((F.shape[0], self.K)) for i in range(F.shape[0]): samples[i,:] = multinomial.rvs(n=1, p=p[i,:], size=1) return self.invonehot(Y=samples)
def create_multinomial_doublet(X: np.ndarray, i: int, j: int, **kwargs): '''make a multinomial combination of 2 cells Parameters ---------- X : np.array cell by genes matrix i : int, randomly chosen ith cell j : int, randomly chosen jth cell kwargs : dict, dict with doublet_depth, cell_depths and cells_ids as keys doublet_depth is an int cell_depths is an list of all cells total UMI counts as ints cell_ids list of lists with genes with counts for each cell Returns ------- float multinomial expression vector of two cells ''' doublet_depth = kwargs["doublet_depth"] cell_depths = kwargs["cell_depths"] cells_ids = kwargs["cells_ids"] randomize_doublet_size = kwargs["randomize_doublet_size"] # add their counts dp = X[i] + X[j] non_zero_indexes = np.unique(cells_ids[i] + cells_ids[j]) if issparse(X): dp = dp.data else: dp = np.ravel(dp) dp = dp[non_zero_indexes] # a huge hack caused by # https://github.com/numpy/numpy/issues/8317 # fun fun fun https://stackoverflow.com/questions/23257587/how-can-i-avoid-value-errors-when-using-numpy-random-multinomial # okay with this hack because affects pro # normalize dp /= dp.sum() if randomize_doublet_size: scale_factor = np.random.uniform(1., doublet_depth) else: scale_factor = doublet_depth # choose depth dd = int(scale_factor * (cell_depths[i] + cell_depths[j]) / 2) # sample counts from multinomial non_zero_probs = multinomial.rvs(n=dd, p=dp) probs = np.zeros(X.shape[1]) probs[non_zero_indexes] = non_zero_probs return csr_matrix(probs) if issparse(X) else probs
def transform_single(index): column = X[:, index].copy() mask = pd.isnull(column) values, probabilities = self.statistics_[index] sample = np.argmax(multinomial.rvs(p=probabilities, n=1, size=mask.sum(), random_state=self.random_state), axis=1) column[mask] = np.vectorize(lambda pick: values[pick])(sample) return column
def sample_from_multinomial(self, sampletimes=1): if self.valuenumber is None: self.set_random_numbers() self.set_random_probabilities() sample = np.zeros(sampletimes) for x in xrange(0, sampletimes): One_sample = multinomial.rvs(1, self.probabilities) sample[x] = np.where(One_sample == 1)[0] + 1 self.temporary_sample = sample
def sample(self, size=1): import numpy as np if isinstance(size, int): if self.input is None: size = [0]*size else: size = self.input.sample(size) elif self.input is None: raise ValueError('no input model provided to index into') params = [self.param(self.get_beta(idx)) for idx in size] if self.kind=='con': from scipy.stats import norm return np.array([norm.rvs(loc=p, scale=self.sigma, size=1) for p in params]) else: from scipy.stats import multinomial return np.array([multinomial.rvs(n=1, p=p, size=1).argmax() for p in params])
def __getitem__(self, index): indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size] X = self.X_train[indexes, :, :, :] if self.is_data_augment and not self.is_validate: if self.data_augment_noise_type == 'normal': noises = np.random.normal(loc=self.normal_loc, scale=1., size=X.shape) elif self.data_augment_noise_type == 'uniform': noises = np.random.uniform(low=-1., high=1., size=X.shape) X += 0.001 * noises if self.is_validate: y = self.y_train[indexes, :] else: num_label_case = np.sum([ self.is_soft_label, self.is_sample_label_dist, self.is_mix_label_original ]) assert num_label_case == len(self.list_label_case) idx_label_case = np.random.randint(num_label_case, size=len(indexes)) y = np.zeros((len(indexes), self.y_train.shape[1])) for i, ind in enumerate(indexes): name_label_case = self.list_label_case[idx_label_case[i]] if name_label_case == 'soft_label': y[i] = self.y_train[ind] elif name_label_case == 'sample_label_dist': y_prob = self.y_train[ind].astype(np.float64) y_prob /= np.sum(y_prob) if np.sum(y_prob) > 1: # due to numerical precision y_prob += np.finfo(float).eps y_prob /= np.sum(y_prob) y[i] = multinomial.rvs(1, y_prob, 1) elif name_label_case == 'mix_label_original': y[i] = self.y_original[ind] else: raise ValueError( 'not existing label case: {}'.format(name_label_case)) #print (i, ind, name_label_case,y[i]) assert np.sum(y) == len(indexes) assert np.all( np.abs(np.sum(y, axis=1) - 1.) < 1e-5 ), 'label probability does not sum up to 1\n{}\n{}'.format( np.sum(y, axis=1), y) return X, y
def __call__(self, NUM=None, P=None, sampletimes=1): self.NUM = NUM if P is None: if NUM is None: self.gen_NUM() self.gen_P() else: self.P = P # Return a Sample sample = np.zeros(sampletimes) for x in xrange(0, sampletimes): One_sample = multinomial.rvs(1, self.P) sample[x] = np.where(One_sample == 1)[0] return sample
def distribute_doses(self, model: SIR) -> Tuple[np.array]: if self.exhausted(model): return (np.zeros(self.age_ratios.shape), np.zeros(self.age_ratios.shape), np.zeros(self.age_ratios.shape)) dV = (model.S[-1] / model.N[-1]) * self.daily_doses * self.effectiveness model.S[-1] -= dV model.parallel_forward_epi_step() distributed_doses = Multinomial.rvs(self.daily_doses, self.age_ratios) effective_doses = self.effectiveness * distributed_doses immunizing_doses = (model.S[-1].mean() / model.N[-1].mean()) * effective_doses self.bin_populations -= immunizing_doses.astype(int) return (distributed_doses, effective_doses, immunizing_doses)
def gen_surrogate_counts(n_point, p_cat, p_low, p_high, alpha, xmin, xmax, bins, discrete, random_state): """ Generate surrogate hit counts :param n_point: total number of data points :param p_cat: probability of `low`, `pareto` and `high` categories :param p_low, p_high: hit probabilities within categories `low` and `high` :param alpha: exponent of the `pareto` regime :param xmin, xmax: boundaries of the `pareto` regime, so that all(low<xmin) and all(xmax<=high) :param bins: bin boundaries (used for calculating cdf and or binning samples) :param discrete: use zipf distribution instead of pareto, bool :param random_state: :return: surrogate hit counts """ random_state = check_random_state(random_state) s_low, s_mid, s_high = multinomial.rvs(n_point, p_cat, random_state=random_state) # TODO: the same can be achieved by using the cdf and multinomial sampling, see whether it is stable enough. sample = dispatch_rvs(alpha, xmin, xmax, discrete, size=s_mid, random_state=random_state) counts, _ = np.histogram(sample, bins) if s_low: counts[0:len(p_low)] = multinomial.rvs(s_low, p_low, random_state=random_state) if s_high: counts[len(counts) - len(p_high):len(counts)] = multinomial.rvs( s_low, p_high, random_state=random_state) return counts
def _resample(self, n, prob, classes, grouped_data): samples_no = multinomial.rvs(n=n, p=prob, random_state=self.random_state) subset_x, subset_y = [], [] for no, j in enumerate(classes): data = grouped_data[j] resample_class = resample(data, replace=True, n_samples=samples_no[no], random_state=self.random_state) for sample in resample_class: subset_x.append(sample[0]) subset_y.append(sample[1]) return np.array(subset_x), np.array(subset_y)
def compute_conditional_z(q, y, mu, sigma_square): #Could be optimised in future work n = np.shape(y)[0] d = np.shape(mu)[0] z = np.empty(shape=(n)) i = np.empty(shape=(n, d)) for l in range(n): temp = np.empty(shape=(d)) for j in range(d): temp[j] = q[j] * multivariate_normal.pdf( y[l, 0], mean=mu[j], cov=sigma_square[j]) temp[temp < 0] = 0 temp = temp / np.sum(temp) i[l, :] = multinomial.rvs(n=1, p=temp, size=1)[0] z = np.where(i == 1)[1] return z, i
def generate_data(n, seed=None, x=None): if seed is not None: np.random.seed(seed) if x is None: x = np.random.uniform(size=(n, 1)) eta_1, eta_2, eta_3 = etas(x) class_probs = np.hstack((eta_1, eta_2, eta_3)) y_cats = np.array([ multinomial.rvs(1, class_probs[i], random_state=(seed if i == 0 else None)) for i in range(x.shape[0]) ]) y = np.argmax(y_cats, axis=1) return x, y