def getTrueDistn(true_probs): if true_probs == None: football_data = pd.read_csv( os.path.join(os.environ['HOME'], 'Bayesian_Inference/csv/EPL20172018.csv')) naive_probs = football_data['FTR'].value_counts(normalize=True) true_distn = multinomial(1, naive_probs) else: true_distn = multinomial(1, true_probs) return true_distn
async def generate_event_2(users, producer): m = {0: "good", 1: "neutral", 2: "bad"} n11 = norm(10, 2) n12 = norm(20,5) rv = multinomial(1, [0.3, 0.2, 0.5]) def gen_event(user): return ( "user_%s" % user, { "userId": "user_%s" % user, "userValue3": round(n11.rvs() if user % 4 == 0 else n12.rvs(), 2), "userValue4": m[int(np.argmax(rv.rvs()))], "timestamp": int((datetime.utcnow() - datetime(1970, 1, 1)).total_seconds() * 1000) } ) while True: size = random.randint(10,20) print("Event 2", size) for user in random.sample(users, size): user_id, user_event = gen_event(user) producer.produce(topic = "dev-v1-avro-event2", key = {"user": user_id}, value=user_event) await asyncio.sleep(5)
def data_generating_process(N, sigma_0, p_domain, gamma, V, theta, coef, beta=None, random_state=None): """ """ ## Set Random State if random_state is not None: np.random.seed(random_state) ## Update Beta if beta is None: beta = 1 / V ## Convert Data Types theta = np.array(theta) coef = np.array(coef) ## Normalization of Parameters theta = theta / theta.sum(axis=1, keepdims=True) ## Update Document Topic Concentration theta = theta * sigma_0 ## Generate Topic-Word Distributions phi = stats.dirichlet([beta] * V).rvs(theta.shape[1]) ## Data Storage X_latent = np.zeros((N, coef.shape[1]), dtype=float) X = np.zeros((N, phi.shape[1]), dtype=int) D = np.zeros(N, dtype=int) ## Sample Procedure for n in tqdm(range(N), "Sampling"): ## Sample Domain D[n] = int(np.random.rand() < p_domain) ## Sample Document Topic Mixture (Conditioned on Domain) X_latent[n] = stats.dirichlet(theta[D[n]]).rvs() ## Sample Number of Words n_d = stats.poisson(gamma).rvs() ## Create Document for _ in range(n_d): ## Sample Topic z = np.where(stats.multinomial(1, X_latent[n]).rvs()[0] > 0)[0][0] ## Sample Word w = np.random.choice(phi.shape[1], p=phi[z]) ## Cache X[n, w] += 1 ## Standardize X_latent_normed = standardize(X_latent, D) ## Compute P(y) py = np.zeros(N) py[D == 0] = (1 / (1 + np.exp(-coef[[0]].dot(X_latent_normed[D == 0].T))))[0] py[D == 1] = (1 / (1 + np.exp(-coef[[1]].dot(X_latent_normed[D == 1].T))))[0] ## Sample Y y = np.zeros(N) y[D == 0] = (np.random.rand((D == 0).sum()) < py[D == 0]).astype(int) y[D == 1] = (np.random.rand((D == 1).sum()) < py[D == 1]).astype(int) return X_latent, X, y, D, theta, phi
def compare_case_control(case_counts, control_counts): results = [] for pos in case_counts: this_case_counts = case_counts[pos] case_A = this_case_counts.A case_T = this_case_counts.T case_G = this_case_counts.G case_C = this_case_counts.C case_total = case_A + case_T + case_G + case_C if pos in control_counts: this_control_counts = control_counts[pos] control_A = this_control_counts['A'] control_T = this_control_counts['T'] control_G = this_control_counts['G'] control_C = this_control_counts['C'] control_total = control_A + control_T + control_G + control_C control_A_proportion = control_A / control_total control_T_proportion = control_T / control_total control_G_proportion = control_G / control_total control_C_proportion = control_C / control_total rv = multinomial(case_total, [ control_A_proportion, control_T_proportion, control_G_proportion, control_C_proportion ]) probability = rv.pmf([case_A, case_T, case_G, case_C]) if probability <= 0.001: results.append( (probability, pos, case_A, case_T, case_G, case_C, control_A, control_T, control_G, control_C)) return results
def sample(self, batch_size, buckets=False): """Samples a batch of datapoints. Args: batch_size (int): Number of datapoints to sample. buckets (bool): Indicates if buckets indices should be returned. Returns: Datapoint object with sampled datapoints stacked along the 0 axis. Raises: ValueError: If the buffer is empty. """ if self._hierarchy_depth > 1: samples = [self._sample_one() for _ in range(batch_size)] else: p = np.ones((len(self._buffer_hierarchy), ), dtype=np.float32) p = np.atleast_1d(p) / p.sum() samples = [] distribution = multinomial(batch_size, p=p) rvs = distribution.rvs(1).squeeze(axis=0) for bucket, n_samples in zip(self._buffer_hierarchy, rvs): if self._hierarchy_depth > 0: buffer = self._buffer_hierarchy[bucket] else: buffer = self._buffer_hierarchy samples_b = buffer.sample(n_samples) if buckets: samples_b = samples_b + (np.full(n_samples, bucket), ) samples.append(samples_b) return data.nested_concatenate(samples)
def proba(self, s): """ Given a state observation :math:`s`, return a proability distribution over all possible actions. Parameters ---------- s : state observation Depending on the observation space, `s` may be an integer or an array of floats. Returns ------- dist : scipy.stats probability distribution Depending on the action space, this may be a discrete distribution (typically a Dirichlet distribution) or a continuous distribution (typically a normal distribution). """ X_s = self.X_next(s) P = self.batch_eval(X_s) if isinstance(self.env.action_space, Discrete): return st.multinomial(n=1, p=P[0]) else: raise NotImplementedError( "I haven't yet implemented continuous action spaces; " "please send me a message to let me know if this is holding " "you back. -kris")
def getMargin(comps): # this implementation: only Beta, Weibull and Multinomial (cat) mtype = comps['mtype'] params = comps['params'] c_nr = comps['comps_nr'] if mtype == 'Beta': dists = [] for c_id in range(c_nr): #labels = ['a','b'] param = params[c_id][:2] dist = stt.beta(a=param[0], b=param[1]) dists.append(dist) elif mtype == 'Weibull': dists = [] for c_id in range(comps['comps_nr']): #labels = ['c','scale'] param = [params[(p_id * c_nr) + c_id] for p_id in range(2)] dist = stt.weibull_min(param[0], 0., param[1]) dists.append(dist) elif mtype == 'Multinomial': # IMPLEMENTAR dists = [stt.multinomial(1, params)] try: dists[0].interval(1.) except: domain = [] else: domains = np.array( [dists[d_id].interval(1.) for d_id in range(len(dists))]) domain_raw = [domains[:, 0].max(), domains[:, 1].min()] delta = 0.001 * (domain_raw[1] - domain_raw[0]) domain = [domain_raw[0] + delta, domain_raw[1] - delta] return dists, domain
def calc_full_log_likelihood(count_matrix, node_membership, duration, bp_lambda, num_classes, add_com_assig_log_prob=True): """ Calculates the full log likelihood of the Poisson baseline model. :param count_matrix: n_classes x n_classes where entry ij is denotes the number of events in block-pair ij :param node_membership: (list) membership of every node to one of K classes :param duration: (int) duration of the network :param bp_lambda: n_classes x n_classes where entry ij is the lambda of the block pair ij :param num_classes: (int) number of blocks / classes :param add_com_assig_log_prob: if True, adds the likelihood the community assignment to the total log-likelihood. :return: log-likelihood of the Poisson baseline model """ log_likelihood = 0 bp_size = utils.calc_block_pair_size(node_membership, num_classes) bp_ll = count_matrix * np.log(bp_lambda) - (bp_lambda * duration * bp_size) log_likelihood += np.sum(bp_ll) if add_com_assig_log_prob: # Adding the log probability of the community assignments to the full log likelihood n_nodes = len(node_membership) _, block_count = np.unique(node_membership, return_counts=True) class_prob_mle = block_count / sum(block_count) rv_multi = multinomial(n_nodes, class_prob_mle) log_prob_community_assignment = rv_multi.logpmf(block_count) log_likelihood += log_prob_community_assignment return log_likelihood
def initial_logp(states, p_transition): initial_state = states[0] states_oh = np.eye(len(p_transition)) eq_p = equilibrium_distribution(p_transition) return ( multinomial(n=1, p=eq_p) .logpmf(states_oh[initial_state].squeeze()) )
def BRIE_base_lik(psi, counts, lengths): """Base likelihood function of BRIE model """ size_vect = np.array([psi, (1 - psi), 1]) * lengths prob_vect = size_vect / np.sum(size_vect) rv = multinomial(np.sum(counts), prob_vect) return rv.pmf(counts)
def main(): N = 1000 s1 = norm(0,1).rvs(size=N) s2 = binom(100,0.1).rvs(size=N) s3 = gamma(1,1).rvs(size=N) s4 = beta(1,2).rvs(size=N) s5 = multinomial(200,[1/3, 1/3, 1/3]).rvs(size=N) return s1[-1]+s2[-1]+s3[-1]+s4[-1]+s5[-1]
def MixedNormalDistribution(weights, means, covariances, size): cases = multinomial(1, weights).rvs(size) rvs = [] for case in cases: k = case.tolist().index(1) rvs.append(multivariate_normal.rvs(means[k], covariances[k])) rvs = np.asarray(rvs) return [rvs[:, k] for k in range(len(rvs[0]))]
def state_logp(states, p_transition): logp = 0 # states are 0, 1, 2, but we model them as [1, 0, 0], [0, 1, 0], [0, 0, 1] states_oh = np.eye(len(p_transition)) for curr_state, next_state in zip(states[:-1], states[1:]): p_tr = p_transition[curr_state] logp += multinomial(n=1, p=p_tr).logpmf(states_oh[next_state]) return logp
def GeneratePoint(self, probs): mb = ss.multinomial( 1, probs) # set n = 1 in multinomial to simulate an multi-bernulli state = np.flatnonzero( mb.rvs(1)[0])[0] + 1 # should always be 1 in this sample self.states.append(state) gaussian = self.rvs[state - 1] point = gaussian.rvs(1) self.points.append(point)
def resample(observation_states, observation_actions, support_states, support_policy, prior=None, alpha=1., punishment=0., support_feature_ids=None, support_feature_state_dict=None, observation_goal_actions=None, T=1., return_best=False, **kwargs): """ \sum p(O_i | g_z_i) x p(g_j) """ # prob_vector over support_states or feature ids prob_vector = likelihood_vector(observation_states, observation_actions, support_policy, \ support_states, alpha=alpha, punishment=punishment, support_feature_ids=support_feature_ids, support_feature_state_dict=support_feature_state_dict, observation_goal_actions=observation_goal_actions,) if prior is not None: prob_vector *= prior if return_best: chosen = np.argmax(prob_vector) else: prob_vector /= (np.sum(prob_vector) + eps) prob_vector = prob_vector**T prob_vector /= (np.sum(prob_vector) + eps) prob_sum = np.sum(prob_vector) while prob_sum < 0.99: prob_vector /= (np.sum(prob_vector) + eps) prob_sum = np.sum(prob_vector) try: assert prob_sum >= 0.95, "prob sum {} is lower than 1.".format( prob_sum) except: from IPython import embed embed() sys.exit() rv = multinomial(n=1, p=prob_vector) chosen = np.argmax(rv.rvs(1)) if support_feature_ids is None: goal_chosen = support_states[chosen] policy_chosen = support_policy[goal_chosen] return [goal_chosen, policy_chosen] else: #chosen is feature id goal_chosen = support_feature_state_dict[chosen] policy_chosen = support_policy[goal_chosen[0]] return [goal_chosen[0], policy_chosen, chosen]
def test_entropy(): """ Test entropy. """ cat_benchmark = stats.multinomial(n=1, p=[0.7, 0.3]) expect_entropy = cat_benchmark.entropy().astype(np.float32) entropy = EntropyH() output = entropy() tol = 1e-6 assert (np.abs(output.asnumpy() - expect_entropy) < tol).all()
def test_entropy_scalar(self): # The TFP Multinomial does not implement `entropy`, so we use scipy for # the tests. probs = np.asarray([0.1, 0.5, 0.4]) total_count = 5 scipy_entropy = stats.multinomial(n=total_count, p=probs).entropy() distrax_entropy_fn = self.variant(lambda x, y: multinomial.Multinomial. _entropy_scalar(total_count, x, y)) self.assertion_fn(distrax_entropy_fn(probs, np.log(probs)), scipy_entropy)
def mnll(true_counts, logits=None, probs=None): """ Compute the multinomial negative log-likelihood between true counts and predicted values of a BPNet-like profile model One of `logits` or `probs` must be given. If both are given `logits` takes preference. Args: true_counts (numpy.array): observed counts values logits (numpy.array): predicted logits values probs (numpy.array): predicted values as probabilities Returns: float: cross entropy """ dist = None if logits is not None: # check for length mismatch if len(logits) != len(true_counts): raise quietexception.QuietException( "Length of logits does not match length of true_counts") # convert logits to softmax probabilities probs = logits - logsumexp(logits) probs = np.exp(probs) elif probs is not None: # check for length mistmatch if len(probs) != len(true_counts): raise quietexception.QuietException( "Length of probs does not match length of true_counts") # check if probs sums to 1 if abs(1.0 - np.sum(probs)) > 1e-3: raise quietexception.QuietException( "'probs' array does not sum to 1") else: # both 'probs' and 'logits' are None raise quietexception.QuietException( "At least one of probs or logits must be provided. " "Both are None.") # compute the nmultinomial distribution mnom = multinomial(np.sum(true_counts), probs) return -(mnom.logpmf(true_counts) / len(true_counts))
def weighted_random_sampling(qnodes, coeffs, shots, argnums, *args, **kwargs): """Returns an array of length ``shots`` containing single-shot estimates of the Hamiltonian gradient. The shots are distributed randomly over the terms in the Hamiltonian, as per a multinomial distribution. Args: qnodes (Sequence[.QNode]): Sequence of QNodes, each one when evaluated returning the corresponding expectation value of a term in the Hamiltonian. coeffs (Sequence[float]): Sequences of coefficients corresponding to each term in the Hamiltonian. Must be the same length as ``qnodes``. shots (int): The number of shots used to estimate the Hamiltonian expectation value. These shots are distributed over the terms in the Hamiltonian, as per a Multinomial distribution. argnums (Sequence[int]): the QNode argument indices which are trainable *args: Arguments to the QNodes **kwargs: Keyword arguments to the QNodes Returns: array[float]: the single-shot gradients of the Hamiltonian expectation value """ # determine the shot probability per term prob_shots = np.abs(coeffs) / np.sum(np.abs(coeffs)) # construct the multinomial distribution, and sample # from it to determine how many shots to apply per term si = multinomial(n=shots, p=prob_shots) shots_per_term = si.rvs()[0] grads = [] for h, c, p, s in zip(qnodes, coeffs, prob_shots, shots_per_term): # if the number of shots is 0, do nothing if s == 0: continue # set the QNode device shots h.device.shots = [(1, s)] jacs = [] for i in argnums: j = qml.jacobian(h, argnum=i)(*args, **kwargs) if s == 1: j = np.expand_dims(j, 0) # Divide each term by the probability per shot. This is # because we are sampling one at a time. jacs.append(c * j / p) grads.append(jacs) return [np.concatenate(i) for i in zip(*grads)]
def test_entropy(self, dist_params): # The TFP Multinomial does not implement `entropy`, so we use scipy for # the tests. dist_params.update({ 'total_count': np.asarray([3, 10]), }) dist = self.distrax_cls(**dist_params) entropy = list() for probs, counts in zip(dist.probs, dist.total_count): entropy.append(stats.multinomial(n=counts, p=probs).entropy()) self.assertion_fn(self.variant(dist.entropy)(), np.asarray(entropy))
def calc_full_log_likelihood(block_pair_events, node_membership, bp_mu, bp_alpha, bp_beta, duration, num_classes, add_com_assig_log_prob=True): """ Calculates the full log likelihood of the CHIP model. :param block_pair_events: (list) n_classes x n_classes where entry ij is a list of event lists between nodes in block i to nodes in block j. :param node_membership: (list) membership of every node to one of K classes. :param bp_mu: n_classes x n_classes where entry ij is the mu of the block pair ij :param bp_alpha: n_classes x n_classes where entry ij is the alpha of the block pair ij :param bp_beta: n_classes x n_classes where entry ij is the beta of the block pair ij :param duration: (int) duration of the network :param num_classes: (int) number of blocks / classes :param add_com_assig_log_prob: if True, adds the likelihood the community assignment to the total log-likelihood. :return: log-likelihood of the CHIP model """ log_likelihood = 0 for b_i in range(num_classes): for b_j in range(num_classes): bp_size = len(np.where(node_membership == b_i)[0]) * len( np.where(node_membership == b_j)[0]) if b_i == b_j: bp_size -= len(np.where(node_membership == b_i)[0]) log_likelihood += estimate_utils.block_pair_full_hawkes_log_likelihood( block_pair_events[b_i][b_j], bp_mu[b_i, b_j], bp_alpha[b_i, b_j], bp_beta[b_i, b_j], duration, block_pair_size=bp_size) if add_com_assig_log_prob: # Adding the log probability of the community assignments to the full log likelihood n_nodes = len(node_membership) _, block_count = np.unique(node_membership, return_counts=True) class_prob_mle = block_count / sum(block_count) rv_multi = multinomial(n_nodes, class_prob_mle) log_prob_community_assignment = rv_multi.logpmf(block_count) log_likelihood += log_prob_community_assignment return log_likelihood
def makeRGB(self, s): #random.seed(10) N = random.randint(1, self._max_block) mu = [1 / 3, 1 / 3, 1 / 3] rv = multinomial(N, mu, seed=s) X = rv.rvs(1) self._RED = X[0][0] self._GREEN = X[0][1] self._BLUE = X[0][2] self._N = self._RED + self._GREEN + self._BLUE self._orderdate = int(s / 10) + 1 #시간임의로 설정?#datetime.now() #print(self._fill_date) self.print_info()
def posterior_mean(self, data): N = np.sum(data) p_hat = np.zeros(self.data_model.get_params_count()) margin = 0 for p, bscc_dist in zip(self.traces, self.traces_bscc_dist): # P = self.data_model.eval_bscc(p) prior = 1 for p_i in p: prior *= beta(self.hyperparams['alpha'], self.hyperparams['beta']).pdf(p_i) # llh = multinomial(N, P).pmf(data) llh = multinomial(N, bscc_dist).pmf(data) margin += llh * prior p_hat += np.array(p) * llh * prior p_hat = p_hat / margin log_llh = self.np_llh(self.data_model.eval_bscc(p_hat), data) return p_hat, log_llh
def __init__(self, n, p): if n >= 0 and isinstance(n, numbers.Integral): self.n = n #elif n == 0: #raise NotImplementedError #TODO else: raise Exception("n must be a non-negative integer") if sum(p) == 1 and min(p) >= 0: self.p = p else: raise Exception("Elements of p must be non-negative" + " and sum to 1.") self.discrete = False self.pdf = lambda x: stats.multinomial(x, n, p)
def posterior_mean(self, data): N = np.sum(data) p_hat = np.zeros(self.data_model.get_params_count()) margin = 0 for p in self.traces: P = self.data_model.sample_run_chain(p, max_trials=self.max_trials) prior = 1 for p_i in p: prior *= beta(self.hyperparams['alpha'], self.hyperparams['beta']).pdf(p_i) llh = multinomial(N, P).pmf(data) margin += llh * prior p_hat = p_hat + np.array(p) * llh * prior p_hat = p_hat / margin log_llh = self.np_llh(self.data_model.sample_run_chain( p_hat, max_trials=self.max_trials*10), data) return p_hat, log_llh
def generate_data(n_gen): x_gen = stats.skewnorm.rvs(scale=0.1, size=n_gen * x_dim, a=2) x_gen = x_gen.reshape((n_gen, x_dim)) mu_gen = np.apply_along_axis(func, 1, x_gen) y_gen = stats.skewnorm.rvs(loc=beta0, scale=sigma, size=n_gen, a=4) y_gen = mu_gen + y_gen rv = stats.multinomial(1, [0.4, 0.3, 0.2, 0.1]) y_gen += (rv.rvs(n_gen) * [1.4, -.2, 0, -1.8]).sum(1) y_gen = np.array(y_gen, dtype='f4') y_gen = torch.from_numpy(y_gen) y_gen = F.sigmoid(y_gen).numpy() return x_gen, y_gen[:, None]
def Sample_Mixture(f, num): """ Inputs: f: the mixture to be sampled num: the number of points to sample from the mixture Outputs: m_points: the model points that have been selected (Model_Points) """ #so we need a multinomial distribution for component selection m_all = [] q_pick = multinomial(num, f.w).rvs(1)[0] for j in range(f.n): fj = multivariate_normal(mean=f.m[j, :], cov=f.cov[j, :, :]) m_j = fj.rvs(q_pick[j]).reshape([q_pick[j], f.d]) m_all.append(m_j) m_points = Model_Points(np.concatenate(m_all)) return m_points
def _fit_multinomial(self, X, col_idx, y): """ Fits classwise multinomial distributions to `X[:, col_idx]` using the sample parameter MLEs. Parameters ---------- X : np.ndarray Matrix of features. col_idx : int The column index for the column to fit the multinomial to. y : np.ndarray Vector of target classes """ fitted_distributions = {} all_X_values = list(range(int(X[:, col_idx].max()) + 1)) # For each class... for val in sorted(set(y)): n = np.sum(y == val) # Number of instances in the class relevant_subset = X[y == val, col_idx] # Rows in X belonging to class value_counts = Counter( relevant_subset) # Counts of the values in X in the class all_x_value_counts_smoothed = OrderedDict({ x_val: self.alpha # Just alpha if no values if x_val not in value_counts else value_counts[x_val] + self.alpha # Alpha + Num value occurences otherwise for x_val in all_X_values # across the values in the column of X }) # n + Alpha * m normalizer = n + self.alpha * len(all_X_values) # Create the distribution for each class. fitted_distributions[val] = stats.multinomial( n=n, p=np.array(list(all_x_value_counts_smoothed.values())) / normalizer) if self.verbose: logger.info(f"Fitted multinomials for column {col_idx}") for k, v in fitted_distributions.items(): logger.info(f"Class: {k} p: {np.round(v.p, 2)}") return fitted_distributions
def P_tot(l, n_array, pr1, pr2): p0, p1, p2, p3 = Ps(pr1, pr2) va = multinomial(l, [p0, p1, p2, p3]) lista_soma = [] for h in range(0, n_array.size): n = n_array[h] soma = 0 for k in range(0, int(n / 3) + 1): for j in range(0, int(n / 2) + 1): i = 0 while i + 2 * j + 3 * k <= n: if i + 2 * j + 3 * k == n: soma += va.pmf([l - (i + j + k), i, j, k]) i += 1 lista_soma.append(soma) n_soma = np.array(lista_soma) return n_soma
def __compute_B(self, data): self.multinomial = [multinomial(1, self.eta[i, :]) for i in range(self.K)] self.b = np.zeros((self.T, self.K)) for t in range(self.T): self.b[t, :] = [self.eta[y, int(data[t, y])] for y in range(self.K)] ''' T = len(data) self.b = np.zeros((T, self.K)) for t in range(T): print(data[t,:]) self.b[t, :] = [self.eta[y, data[t, :]] for y in range(self.K)] ''' # other computation for log-scale if self.hmm_type == 'log-scale': self.log_b = np.zeros((self.T, self.K)) for t in range(self.T): self.log_b[t, :] = np.log(self.b[t, :]) return