def get_reversible_differential_entropy(Q, stationary_distn, t): """ Compute differential entropy of a time-reversible Markov jump process. This is the differential entropy of the distribution over trajectories. The rate matrix Q must be time-reversible with the given stationary distribution. Parameters ---------- Q : 2d ndarray Matrix of transition rates. stationary_distn : 1d ndarray Stationary distribution of the process. t : float Amount of time over which the process is observed. Returns ------- differential_entropy : float The differential entropy of the distribution over trajectories. This is not the Shannon entropy, and may be negative. """ stationary_entropy = -special.xlogy(stationary_distn, stationary_distn) tmp_trans = Q - special.xlogy(Q, Q) transition_entropy = tmp_trans.sum(axis=0).dot(stationary_distn) differential_entropy = stationary_entropy + t * transition_entropy return differential_entropy
def _jensen_shannon_divergence(a, b): """Compute Jensen-Shannon Divergence Lifted from github/scipy: https://github.com/luispedro/scipy/blob/ae9ad67bfc2a89aeda8b28ebc2051fff19f1ba4a/scipy/stats/stats.py Parameters ---------- a : array-like possibly unnormalized distribution b : array-like possibly unnormalized distribution. Must be of same size as ``a``. Returns ------- j : float """ a = np.asanyarray(a, dtype=np.float) b = np.asanyarray(b, dtype=np.float) a = a/a.sum() b = b/b.sum() m = (a + b) m /= 2. m = np.where(m, m, 1.) return 0.5*np.sum(special.xlogy(a, a/m)+special.xlogy(b, b/m))
def objective(AB): # From Platt (beginning of Section 2.2) P = expit(-(AB[0] * F + AB[1])) loss = -(xlogy(T, P) + xlogy(T1, 1. - P)) if sample_weight is not None: return (sample_weight * loss).sum() else: return loss.sum()
def differential_entropy_helper( Q, prior_root_distn, post_root_distn, post_dwell_times, post_transitions, ): """ Use posterior expectations to help compute differential entropy. Parameters ---------- Q : weighted directed networkx graph Rate matrix. prior_root_distn : dict Prior distribution at the root. If Q is a time-reversible rate matrix, then the prior root distribution could be the stationary distribution associated with Q. post_root_distn : dict Posterior state distribution at the root. post_dwell_times : dict Posterior expected dwell time for each state. post_transitions : weighted directed networkx graph Posterior expected count of each transition type. Returns ------- diff_ent_init : float Initial state distribution contribution to differential entropy. diff_ent_dwell : float Dwell time contribution to differential entropy. diff_ent_trans : float Transition contribution to differential entropy. """ # Get the total rates. total_rates = get_total_rates(Q) # Initial state distribution contribution to differential entropy. diff_ent_init = 0.0 for state, prob in post_root_distn.items(): diff_ent_init -= special.xlogy(prob, prior_root_distn[state]) # Dwell time contribution to differential entropy. diff_ent_dwell = 0.0 for s in set(total_rates) & set(post_dwell_times): diff_ent_dwell += post_dwell_times[s] * total_rates[s] # Transition contribution to differential entropy. diff_ent_trans = 0.0 for sa in set(Q) & set(post_transitions): for sb in set(Q[sa]) & set(post_transitions[sa]): rate = Q[sa][sb]['weight'] ntrans_expected = post_transitions[sa][sb]['weight'] diff_ent_trans -= special.xlogy(ntrans_expected, rate) # Return the contributions to differential entropy. return diff_ent_init, diff_ent_dwell, diff_ent_trans
def test_chi2_contingency_g(): c = np.array([[15, 60], [15, 90]]) g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood', correction=False) assert_allclose(g, 2*xlogy(c, c/e).sum()) g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood', correction=True) c_corr = c + np.array([[-0.5, 0.5], [0.5, -0.5]]) assert_allclose(g, 2*xlogy(c_corr, c_corr/e).sum()) c = np.array([[10, 12, 10], [12, 10, 10]]) g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood') assert_allclose(g, 2*xlogy(c, c/e).sum())
def differential_entropy_helper( Q, prior_root_distn, post_root_distn, post_dwell_times, post_transitions, ): """ Use posterior expectations to help compute differential entropy. Parameters ---------- Q : 2d ndarray Rate matrix. prior_root_distn : 1d ndarray Prior distribution at the root. If Q is a time-reversible rate matrix, then the prior root distribution could be the stationary distribution associated with Q. post_root_distn : 1d ndarray Posterior state distribution at the root. post_dwell_times : 1d ndarray Posterior expected dwell time for each state. post_transitions : 2d ndarray Posterior expected count of each transition type. Returns ------- diff_ent_init : float Initial state distribution contribution to differential entropy. diff_ent_dwell : float Dwell time contribution to differential entropy. diff_ent_trans : float Transition contribution to differential entropy. """ _density.check_square_dense(Q) _density.check_square_dense(post_transitions) nstates = Q.shape[0] # Get the total rates. total_rates = get_total_rates(Q) # Initial state distribution contribution to differential entropy. diff_ent_init = -special.xlogy(post_root_distn, prior_root_distn).sum() # Dwell time contribution to differential entropy. diff_ent_dwell = post_dwell_times.dot(total_rates) # Transition contribution to differential entropy. diff_ent_trans = -special.xlogy(post_transitions, Q).sum() # Return the contributions to differential entropy. return diff_ent_init, diff_ent_dwell, diff_ent_trans
def poisson(self, x, mu): """ Poisson function taken from: https://github.com/scipy/scipy/blob/master/scipy/stats/_discrete_distns.py For license see documentation/BSDLicense_scipy.md Author: Travis Oliphant 2002-2011 with contributions from SciPy Developers 2004-2011 """ if len(np.atleast_1d(x)) == 1: check_val = x else: check_val = x[0] if check_val > 1e18: self.log.warning('The current value in the poissonian distribution ' 'exceeds 1e18! Due to numerical imprecision a valid ' 'functional output cannot be guaranteed any more!') # According to the central limit theorem, a poissonian distribution becomes # a gaussian distribution for large enough x. Since the numerical precision # is limited to calculate the logarithmized poissonian and obtain from that # the exponential value, a self defined cutoff is introduced and set to # 1e12. Beyond that number a gaussian distribution is assumed, which is a # completely valid assumption. if check_val < 1e12: return np.exp(xlogy(x, mu) - gammaln(x + 1) - mu) else: return np.exp(-((x - mu) ** 2) / (2 * mu)) / (np.sqrt(2 * np.pi * mu))
def set_cost_fn(self, reg_str = ' ', hyperp=0.1): """Set the cost function. Also assign a regularization to the same cost function. Regularizations are between l1 and l2, where the weights are summed over using a hyperparameter.""" self.hyperp = hyperp self.reg_str = reg_str if self.cost_fn_str == "MSE": self.cost_fn = lambda u: \ (1./2*self.features) * np.sum(self.y-u)**2 +\ self.regularization() self.cost_der = lambda u: \ np.vectorize(-(self.y - u)) elif self.cost_fn_str == "xentropy": self.cost_fn = lambda u: -np.sum(sps.xlogy(self.y, u) +\ sps.xlogy(1-self.y, 1-u))/self.y.shape[0] #+ self.regularization() self.cost_der = lambda u: u - self.y.reshape(-1, 1) else: raise SyntaxError("Cost function must be 'MSE' or 'xentropy'")
def _boost_real(self, iboost, X, y, sample_weight, random_state): """Implement a single boost using the SAMME.R real algorithm.""" estimator = self._make_estimator(random_state=random_state) estimator.fit(X, y, sample_weight=sample_weight) y_predict_proba = estimator.predict_proba(X) if iboost == 0: self.classes_ = getattr(estimator, 'classes_', None) self.n_classes_ = len(self.classes_) y_predict = self.classes_.take(np.argmax(y_predict_proba, axis=1), axis=0) # Instances incorrectly classified incorrect = y_predict != y # Error fraction estimator_error = np.mean( np.average(incorrect, weights=sample_weight, axis=0)) # Stop if classification is perfect if estimator_error <= 0: return sample_weight, 1., 0. # Construct y coding as described in Zhu et al [2]: # # y_k = 1 if c == k else -1 / (K - 1) # # where K == n_classes_ and c, k in [0, K) are indices along the second # axis of the y coding with c being the index corresponding to the true # class label. n_classes = self.n_classes_ classes = self.classes_ y_codes = np.array([-1. / (n_classes - 1), 1.]) y_coding = y_codes.take(classes == y[:, np.newaxis]) # Displace zero probabilities so the log is defined. # Also fix negative elements which may occur with # negative sample weights. proba = y_predict_proba # alias for readability np.clip(proba, np.finfo(proba.dtype).eps, None, out=proba) # Boost weight using multi-class AdaBoost SAMME.R alg estimator_weight = (-1. * self.learning_rate * ((n_classes - 1.) / n_classes) * xlogy(y_coding, y_predict_proba).sum(axis=1)) # Only boost the weights if it will fit again if not iboost == self.n_estimators - 1: # Only boost positive weights sample_weight *= np.exp(estimator_weight * ((sample_weight > 0) | (estimator_weight < 0))) return sample_weight, 1., estimator_error
def numeric(self, values): x = values[0] results = -xlogy(x, x) # Return -inf outside the domain if np.isscalar(results): return -np.inf else: results[np.isnan(results)] = -np.inf return results
def _revaluate(self, r, t=0.): """Potential as a function of r and time""" x = r / self.a return -self.a**2. * numpy.pi * (-numpy.pi / x + 2. * (1. / x + 1) * numpy.arctan(1 / x) + (1. / x + 1) * numpy.log( (1. + 1. / x)**2. / (1. + 1 / x**2.)) + special.xlogy(2. / x, 1. + x**2.))
def pointwise_loss(self, y_true, raw_predictions): # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to # return a view. raw_predictions = raw_predictions.reshape(-1) # TODO: For speed, we could remove the constant xlogy(y_true, y_true) # Advantage of this form: minimum of zero at raw_predictions = y_true. loss = (xlogy(y_true, y_true) - y_true * (raw_predictions + 1) + np.exp(raw_predictions)) return loss
def binary_log_loss(y_true, y_prob): """Compute binary logistic loss for classification. This is identical to log_loss in binary classification case, but is kept for its use in multilabel case. Parameters ---------- y_true : array-like or label indicator matrix Ground truth (correct) labels. y_prob : array-like of float, shape = (n_samples, n_classes) Predicted probabilities, as returned by a classifier's predict_proba method. Returns ------- loss : float The degree to which the samples are correctly predicted. """ return -(xlogy(y_true, y_prob) + xlogy(1 - y_true, 1 - y_prob)).sum() / y_prob.shape[0]
def beta_logpdf(x, a, b, loc=0, scale=1): x = (x - loc) / scale z = special.xlog1py(b - 1.0, -x) + special.xlogy(a - 1.0, x) z -= special.betaln(a, b) z -= np.log(scale) z = np.where((x < 0) | (x > 1), -np.inf, z) if z.ndim == 0: return z[()] return z
def multiclass_model_data(n_nonevent, n_event, max_pvalue, max_pvalue_policy, scale=None): n, n_classes = n_nonevent.shape DD = [] PV = [] VV = [] for c in range(n_classes): t_n_event = n_event[:, c].sum() t_n_nonevent = n_nonevent[:, c].sum() D = [] V = [] E = [] NE = [] for i in range(1, n + 1): s_event = n_event[:i, c][::-1].cumsum()[::-1] s_nonevent = n_nonevent[:i, c][::-1].cumsum()[::-1] rate = s_event / (s_nonevent + s_event) p = s_event / t_n_event q = s_nonevent / t_n_nonevent iv = special.xlogy(p - q, p / q) if scale is not None: rate *= scale iv *= scale rate = rate.astype(np.int64) iv = iv.astype(np.int64) D.append(rate) V.append(iv) if max_pvalue is not None: E.append(s_event) NE.append(s_nonevent) if max_pvalue is not None: pvalue_violation_indices = find_pvalue_violation_indices( n, E, NE, max_pvalue, max_pvalue_policy) else: pvalue_violation_indices = [] DD.append(D) VV.append(V) PV.append(pvalue_violation_indices) return DD, VV, PV
def model_sim(model1, model2, H_model1_bg, H_model2_bg, H_model1, H_model2, min_overlap=2): models_switched = False # my design model2 cannot be longer than model1 if len(model1) < len(model2): model1, model2 = model2, model1 H_model1_bg, H_model2_bg = H_model2_bg, H_model1_bg H_model1, H_model2 = H_model2, H_model1 models_switched = True scores = [] contributions = [] slices = [] for sl1, sl2 in create_slices(len(model1), len(model2), min_overlap): background_score = 0 # so we want the contributions of the background background_score += H_model1_bg[sl1].sum() background_score += H_model2_bg[sl2].sum() cross_score = 0 # and the contributions of model1 vs. model2 cross_score += H_model1[sl1].sum() # entropy of model1 cross_score += H_model2[sl2].sum() # entropy of model2 # cross entropy part p_bar = 0.5 * (model1[sl1, :] + model2[sl2, :]) p_bar_entropy = xlogy(p_bar, p_bar) / loge2 cross_score -= p_bar_entropy.sum() scores.append(background_score - cross_score) contributions.append((background_score, cross_score)) slices.append((sl1, sl2)) # very neat: https://stackoverflow.com/a/6193521/2272172 max_index, max_score = max(enumerate(scores), key=operator.itemgetter(1)) max_slice1, max_slice2 = slices[max_index] # gotta love python for that: https://stackoverflow.com/a/13335254/2272172 start1, end1, _ = max_slice1.indices(len(model1)) start2, end2, _ = max_slice2.indices(len(model2)) contrib = contributions[max_index] if models_switched: return max_score, (start2 + 1, end2), (start1 + 1, end1), contrib else: return max_score, (start1 + 1, end1), (start2 + 1, end2), contrib
def get_entropy(wf, cut_at=None): """ Calculates entanglement entropy of the TT wavefunction at the specified cut. If cut is not specified, the entropy is calculated at the middle of the chain. Parameters: ----------- wf: tt.Vector Tensor train vector cut_at: int, default None The position of the internal index at which the entropy is calculated. Should be in [0, d-2] If not provided, the cut is assumed at the central internal index for even dimension TT vectors, or at the index left to the central site for odd dimension TT vectors. """ # import pdb # pdb.set_trace() num_dimensions = wf.d if cut_at is None: cut_at = wf.d // 2 - 1 # this is the middle bond if d is even # or the bond left to the central cite is d is odd assert (cut_at <= num_dimensions - 2 and cut_at >= 0) # Get rid of redundant ranks (they cause technical difficulties). wf = wf.round(eps=0) coresX = tt.tensor.to_list(wf) # Left orthogonalize cores for current_dim in range(0, cut_at): coresX = tt.riemannian.riemannian.cores_orthogonalization_step( coresX, current_dim, left_to_right=True) # Right orthogonalize cores for current_dim in range(num_dimensions - 1, cut_at + 1, -1): coresX = tt.riemannian.riemannian.cores_orthogonalization_step( coresX, current_dim, left_to_right=False) # Now we have two adjacent non-orthogonal cores at cut_at and cut_at+1 # locations. Merge them and perform SVD left_core = coresX[cut_at] right_core = coresX[cut_at + 1] r11, n1, r12 = left_core.shape r21, n2, r22 = right_core.shape merged = np.matmul(left_core.reshape([r11 * n1, r12]), right_core.reshape([r21, n2 * r22])) u, s, vh = np.linalg.svd(merged) # Truncate singular values to the size of the rank s2 = (s[:r12])**2 return -np.sum(xlogy(s2, s2))
def poisson(self, x, mu): """ Poisson function taken from: https://github.com/scipy/scipy/blob/master/scipy/stats/_discrete_distns.py For license see documentation/BSDLicense_scipy.md Author: Travis Oliphant 2002-2011 with contributions from SciPy Developers 2004-2011 """ return np.exp(special.xlogy(x, mu) - gamln(x + 1) - mu)
def get_trajectory_log_likelihood( T_aug, root, prior_root_distn, Q_default, nstates): """ Parameters ---------- T_aug : undirected weighted networkx graph Trajectory with weighted edges annotated with states. root : integer Root node. prior_root_distn : 1d ndarray Prior distribution over states at the root. Q_default : 2d ndarray Rate matrix which applies to all edges. nstates : integer Number of states. Returns ------- log_likelihood : float Logarithm of the trajectory likelihood according to the given Markov jump process. Notes ----- Regarding the order of the arguments of this function, T_aug is first to facilitate functools.partial wrapping for MCMC callback. """ # Compute the total rates. nstates = prior_root_distn.shape[0] total_rates = get_total_rates(Q_default) # Compute primary process statistics. # These will be used for two purposes. # One of the purposes is as the denominator of the # importance sampling ratio. # The second purpose is to compute contributions # to the neg log likelihood estimate. info = get_history_statistics(T_aug, nstates, root=root) dwell_times, root_state, transitions = info # contribution of root state to log likelihood init_ll = np.log(prior_root_distn[root_state]) # contribution of dwell times dwell_ll = -np.dot(dwell_times, total_rates) # contribution of transitions trans_ll = special.xlogy(transitions, Q_default).sum() # Return the sum of the log likelihood contributions. log_likelihood = init_ll + dwell_ll + trans_ll return log_likelihood
def _real_pdf(self, arm, theta): p = 0 for pos in range(self.n_positions): pos_prob = self.position_probabilities[pos] a = self.S_kl[arm, pos] b = self.N_kl[arm, pos] - self.S_kl[arm, pos] p += sc.xlog1py(b, -theta * pos_prob) + sc.xlogy(a, theta) p -= sc.betaln(a, b) p += a * np.log(pos_prob) return np.exp(p)
def phot_loglike(data, data_err, data_mask, models, dim_prior=True): """ Computes the log-likelihood between noisy data and noiseless models. Parameters ---------- data : `~numpy.ndarray` of shape `(Nfilt)` Observed data values. data_err : `~numpy.ndarray` of shape `(Nfilt)` Associated (Normal) errors on the observed values. data_mask : `~numpy.ndarray` of shape `(Nfilt)` Binary mask (0/1) indicating whether the data was observed. models : `~numpy.ndarray` of shape `(Nmodel, Nfilt)` Models predictions. dim_prior : bool, optional Whether to apply a dimensional-based correction (prior) to the log-likelihood. Transforms the likelihood to a chi2 distribution with `Nfilt - 3` degrees of freedom. Default is `True`. Returns ------- lnlike : `~numpy.ndarray` of shape `(Nmodel)` Log-likelihood values. """ # Subselect only clean observations. Ndim = sum(data_mask) # number of dimensions flux, fluxerr = data[data_mask], data_err[data_mask] # mean, error mfluxes = models[:, data_mask] # model predictions tot_var = np.square(fluxerr) + np.zeros_like(mfluxes) # total variance # Compute residuals. resid = flux - mfluxes # Compute chi2. chi2 = np.sum(np.square(resid) / tot_var, axis=1) # Compute multivariate normal logpdf. lnl = -0.5 * chi2 lnl += -0.5 * (Ndim * np.log(2. * np.pi) + np.sum(np.log(tot_var), axis=1)) # Apply dimensionality prior. if dim_prior: # Compute logpdf of chi2 distribution. a = 0.5 * (Ndim - 3) # effective dof lnl = xlogy(a - 1., chi2) - (chi2 / 2.) - gammaln(a) - (np.log(2.) * a) return lnl
def test_entropy(self): # Simple tests of entropy. hg = stats.hypergeom(4, 1, 1) h = hg.entropy() expected_p = np.array([0.75, 0.25]) expected_h = -np.sum(xlogy(expected_p, expected_p)) assert_allclose(h, expected_h) hg = stats.hypergeom(1, 1, 1) h = hg.entropy() assert_equal(h, 0.0)
def compute_score(self, predictions): """ Compute the score according to the heuristic. Args: predictions (ndarray): Array of predictions Returns: Array of scores. """ assert predictions.ndim >= 3 # [n_sample, n_class, ..., n_iterations] expected_entropy = - np.mean(np.sum(xlogy(predictions, predictions), axis=1), axis=-1) # [batch size, ...] expected_p = np.mean(predictions, axis=-1) # [batch_size, n_classes, ...] entropy_expected_p = - np.sum(xlogy(expected_p, expected_p), axis=1) # [batch size, ...] bald_acq = entropy_expected_p - expected_entropy return bald_acq
def test_entropy(self): # Basic tests of entropy. pvals = np.array([0.25, 0.45, 0.3]) p = stats.rv_discrete(values=([0, 1, 2], pvals)) expected_h = -sum(xlogy(pvals, pvals)) h = p.entropy() assert_allclose(h, expected_h) p = stats.rv_discrete(values=([0, 1, 2], [1.0, 0, 0])) h = p.entropy() assert_equal(h, 0.0)
def _score_submodel(self, weights, intercepts, dm, binned): """ Utility function for computing D^2 (pseudo R^2) on a given set of weights and intercepts. Is be used in both model subsetting and the mother score() function of the GLM. Parameters ---------- weights : pd.Series Series in which entries are numpy arrays containing the weights for a given cell. Indices should be cluster ids. intercepts : pd.Series Series in which elements are the intercept fit to each cell. Indicies should match weights. dm : numpy.ndarray Design matrix. Should not contain the bias column. dm.shape[1] should be the same as the length of an element in weights. binned : numpy.ndarray nT x nCells array, in which each column is the binned spike train for a single unit. Should be the same number of rows as dm. Returns ------- pd.Series Pandas series containing the scores of the given model for each cell. """ scores = pd.Series(index=weights.index, name='scores') for cell in weights.index: cell_idx = np.argwhere(self.clu_ids == cell)[0, 0] wt = weights.loc[cell].reshape(-1, 1) bias = intercepts.loc[cell] y = binned[:, cell_idx] pred = np.exp(dm @ wt + bias) null_pred = np.ones_like(pred) * np.mean(y) null_deviance = 2 * np.sum( xlogy(y, y / null_pred.flat) - y + null_pred.flat) with np.errstate(divide='ignore', invalid='ignore'): full_deviance = 2 * np.sum( xlogy(y, y / pred.flat) - y + pred.flat) d_sq = 1 - (full_deviance / null_deviance) scores.at[cell] = d_sq return scores
def estimate_topic_counts(in_tsv, vocab_index, author_index, thetas, phis, verbose=False): n_authors = len(author_index) n_docs, n_topics = thetas.shape n_topics, n_vocab = phis.shape topic_term_counts = np.zeros((n_topics, n_vocab)) topic_author_term_counts = [ lil_matrix((n_authors, n_vocab)) for t in range(n_topics) ] nz_phis = phis > 0 log_phis = xlogy(nz_phis, phis) for doc_id, line in enumerate(open(in_tsv, mode='r', encoding='utf-8')): if verbose and doc_id and doc_id % 1000 == 0: print('{}/{}'.format(doc_id, n_docs), file=sys.stderr) fields = line.strip().split('\t') author = fields[1] author_id = author_index[author] tokens = np.array(fields[2].split()) theta_d = thetas[doc_id] nz_theta_d = theta_d > 0 log_theta_d = xlogy(nz_theta_d, theta_d) for term, count in Counter(tokens).items(): term_id = vocab_index[term] topic_dist = np.where(nz_phis.T[term_id] * nz_theta_d != 0, np.exp(log_phis.T[term_id] + log_theta_d), 0.0).ravel() if topic_dist.sum() == 0: continue topic_dist = topic_dist / topic_dist.sum() topics = np.random.choice(n_topics, size=count, p=topic_dist) for topic in topics: topic_term_counts[topic, term_id] += 1 topic_author_term_counts[topic][author_id, term_id] += 1 topic_author_term_counts = [x.tocsr() for x in topic_author_term_counts] return topic_term_counts, topic_author_term_counts
def get_loss_function(self, coeff: list) -> Tuple[float, list, list]: coeff = np.array(coeff) X = self.get_design_matrix(self.params.columns.features) y = self.get_target_column(self.params.columns.target, self.params.outcome) X, y = np.array(X), np.array(y) z = X @ coeff s = expit(z) d = s * (1 - s) D = np.diag(d) hess = X.T @ D @ X y_ratio = (y - s) / d y_ratio[(y == 0) & (s == 0)] = -1 y_ratio[(y == 1) & (s == 1)] = 1 grad = X.T @ D @ (z + y_ratio) loglike = float(np.sum(xlogy(y, s) + xlogy(1 - y, 1 - s))) return loglike, grad, hess
def smoothed(self): idfmatrix = (len(self.texts) / self.binarymatrix.sum(axis=0)) + 1 idfmatrix.astype(np.float16) idfmatrix = xlogy(np.sign(idfmatrix), idfmatrix) / np.log(2) # print(idfmatrix) data = np.asarray(idfmatrix).reshape(-1) self.idfmatrix = diags(data, 0) self.tfidfmatrix = self.tfmatrix * self.idfmatrix
def _joint_entropy(self, predictions, selected): K = predictions.shape[-1] M = selected.shape[0] exp_y = np.array( [np.matmul(selected, predictions[i].T) for i in range(predictions.shape[0])]) / K mean_entropy = selected.mean(-1, keepdims=True)[None] step = 256 for idx in range(0, exp_y.shape[0], step): b_preds = exp_y[idx: (idx + step)] yield np.sum(-xlogy(b_preds, b_preds) / mean_entropy, axis=(1, -1)) / M
def dbin_llf(r, p, n): r""" binomial distribution r ~ binomial(p, n); r = 0, ..., n ..math:: P(r; p, n) = \frac{n!}{r!(n-r)!} p^r (1-p)^{n-r} """ return binomln(n, r) + xlogy(r, p) + xlog1py(n - r, -p)
def pmf_for_val(self,x): lambda_xi_x = self.my_lambda[...,None] + self.xi[...,None]*x log_pmf=( np.log(self.my_lambda[...,None]) + xlogy(x-1,lambda_xi_x) - lambda_xi_x - gammaln(x+1) # Gamma(x+1)=x! ) # Those values beyond x_max will have nans # that we replace with -np.inf log_pmf[np.isnan(log_pmf)]=-np.inf # Normalized return np.exp(log_pmf-logsumexp(log_pmf, axis=-1, keepdims=True))
def loglik(self,x): full_x=np.arange(*self.get_max_support())[(None,)*len(self.my_lambda.shape)] assert np.all(x>=full_x.min()) and np.all(x<=full_x.max()) lambda_xi_x = self.my_lambda[...,None] + self.xi[...,None]*full_x log_pmf=( np.log(self.my_lambda[...,None]) + xlogy(full_x-1,lambda_xi_x) - lambda_xi_x - gammaln(full_x+1) # Gamma(x+1)=x! ) # Normalized return log_pmf[x]-logsumexp(log_pmf, axis=-1, keepdims=True)
def normalized_non_maximum_entropy_detector(probabilities): indices = numpy.argmax(probabilities, axis=1) normalized_probabilities = numpy.copy(probabilities) normalized_probabilities[numpy.arange(probabilities.shape[0]), indices] = 0 normalized_probabilities = normalized_probabilities / numpy.sum( normalized_probabilities, axis=1).reshape(-1, 1) from scipy.special import xlogy confidences = -numpy.sum( xlogy(normalized_probabilities, normalized_probabilities), axis=1) #confidences /= math.log(probabilities.shape[1]) return confidences
def mix_ground_truth(self): # fx and fy are x and y marginal probability density functions(pdf) of mix-gaussian distribution # fxy is the joint probability density function of mix-gaussian distribution # the mutual information ground truth is the difference between sum of entropy of individual variables and joint entropy of all variables # the entropies are computed by integrating the expectation of pdf of variables involved mix, mix2, covMat1, covMat2, mu = self.mix, self.mix2, self.covMat1, self.covMat2, self.mu def fxy1(x, y): X = np.array([x, y]) temp1 = np.matmul(np.matmul(X - mu, np.linalg.inv(covMat1)), (X - mu).transpose()) temp2 = np.matmul(np.matmul(X + mu, np.linalg.inv(covMat2)), (X + mu).transpose()) return mix*np.exp(-.5*temp1) / (2*np.pi * np.sqrt(np.linalg.det(covMat1))) \ + (1-mix)*np.exp(-.5*temp2) / \ (2*np.pi * np.sqrt(np.linalg.det(covMat2))) def fxy2(x, y): X = np.array([x, y]) temp1 = np.matmul(np.matmul(X + mu, np.linalg.inv(covMat1)), (X + mu).transpose()) temp2 = np.matmul(np.matmul(X - mu, np.linalg.inv(covMat2)), (X - mu).transpose()) return mix*np.exp(-.5*temp1) / (2*np.pi * np.sqrt(np.linalg.det(covMat1))) \ + (1-mix)*np.exp(-.5*temp2) / \ (2*np.pi * np.sqrt(np.linalg.det(covMat2))) def fxy(x, y): return mix2 * fxy1(x, y) + (1 - mix2) * fxy2(x, y) lim = np.inf hxy = dblquad(lambda x, y: -xlogy(fxy(x, y), fxy(x, y)), -lim, lim, lambda x: -lim, lambda x: lim) hxy1 = dblquad(lambda x, y: -xlogy(fxy1(x, y), fxy1(x, y)), -lim, lim, lambda x: -lim, lambda x: lim) hxy2 = dblquad(lambda x, y: -xlogy(fxy2(x, y), fxy2(x, y)), -lim, lim, lambda x: -lim, lambda x: lim) con_entropy = mix2 * hxy1[0] + (1 - mix2) * hxy2[0] # print(hxy[0], hxy1[0], hxy2[0]) return hxy[0] - con_entropy
def logpmf(self): x=np.arange(*self.get_max_support())[(None,)*len(self.my_lambda.shape)] lambda_xi_x = self.my_lambda[...,None] + self.xi[...,None]*x log_pmf=( np.log(self.my_lambda[...,None]) + xlogy(x-1,lambda_xi_x) - lambda_xi_x - gammaln(x+1) # Gamma(x+1)=x! ) # Those values beyond x_max will have nans # that we replace with -np.inf log_pmf[np.isnan(log_pmf)]=-np.inf # Normalized return log_pmf-logsumexp(log_pmf, axis=-1, keepdims=True)
def observed_objective(T, root, edges, data_count_pairs, log_params): pman = ParamManager(edge_labels=edges).set_packed(log_params) edge_rates, nt_probs, kappa, penalty = pman.get_implicit() nt_distn1d = np.array(nt_probs) Q = npctmctree.hkymodel.get_normalized_Q(kappa, nt_distn1d) edge_to_P = {} for edge, edge_rate in zip(edges, edge_rates): edge_to_P[edge] = expm(edge_rate * Q) xmaps, counts = zip(*data_count_pairs) lhoods = dynamic_xmap_lhood.get_iid_lhoods( T, edge_to_P, root, nt_distn1d, xmaps) log_likelihood = xlogy(counts, lhoods).sum() return -log_likelihood + penalty
def cross_entropy(y_true, y_pred, delta=1e-9): """ Binary Cross Entropy. While the definition varies a little bit across ML or information theory domains, Cross-entropy in general is a method of measuring the difference of information between two probability distributions (i.e. a way of measuring how similar they are). In our context, cross entropy is the difference between our observed values (which can be viewed as a probability distribution where every probability is either 0 or 1, since every value is known and thus has an extreme probability) and the predicted values (which are actual probabilities since the true values are unknown). Since we are interested in predicting values that follow a bernoulli distribution, the cross entropy takes the form of the negative log-likelihood of the bernoulli distribution. With this cost function defined, Neural Networks are just performing a more BA version of maximum likelihood estimation. A negative cost is defined because maximizing the likelihood is the same as minimizing the negative likelihood. Parameters ---------- y_true : numpy array True, observed values. The outcome of an event (where 1 == success). y_pred : numpy array The predicted success probabilities. delta : float, optional A small, positive constant to offset predicted probabilities that are zero, which avoids log(0). Is ignored if delta = 0. The default is 1e-9. Returns ------- cost : float The binary cross-entropy. """ # Compute the cross-entropy cost # To avoid log(0) errors (not necessary in most cases) ypred = y_pred.copy() if delta != 0: ypred[ypred <= delta] = delta ypred[ypred >= 1 - delta] = 1 - delta # m is the number of observations, and m_scale is a scaling factor to make # the computation easier in case the gradients are really big. cost = -np.sum(xlogy(y_true, ypred)) return cost
def entropy(y): """Return the empirical entropy of samples y of a categorical distribution Arguments: y: np.array (N, C) , categorical labels Returns: H: float """ if len(y) == 0: return 0 py = y.mean(0) h = -np.sum(xlogy(py, py)) return h
def model_data(n_nonevent, n_event, max_pvalue, max_pvalue_policy, scale=None, return_nonevent_event=False): n = len(n_nonevent) t_n_event = n_event.sum() t_n_nonevent = n_nonevent.sum() D = [] V = [] E = [] NE = [] for i in range(1, n + 1): s_event = n_event[:i][::-1].cumsum()[::-1] s_nonevent = n_nonevent[:i][::-1].cumsum()[::-1] rate = s_event / (s_nonevent + s_event) p = s_event / t_n_event q = s_nonevent / t_n_nonevent iv = special.xlogy(p - q, p / q) if scale is not None: rate *= scale iv *= scale D.append(rate.astype(np.int64)) V.append(iv.astype(np.int64)) else: D.append(rate) V.append(iv) if max_pvalue is not None or return_nonevent_event: E.append(s_event) NE.append(s_nonevent) if max_pvalue is not None: pvalue_violation_indices = find_pvalue_violation_indices( n, E, NE, max_pvalue, max_pvalue_policy) else: pvalue_violation_indices = [] if return_nonevent_event: return D, V, NE, E, pvalue_violation_indices return D, V, pvalue_violation_indices
def entropy(x): """Calculate the entropy of a discrete probability distribution. Parameters ---------- x : array-like Discrete probability distribution. Returns ------- entropy : float """ x = np.asarray(x) return -special.xlogy(x, x).sum()
def get_entropy(series, nbins=15): """ :param series: a 1-D array for which we need to find the entropy :param nbins: number of bins for histogram :return: entropy """ # https://www.mathworks.com/matlabcentral/answers/27235-finding-entropy-from-a-probability-distribution counts, bin_edges = np.histogram(series, bins=nbins) p = counts / np.sum(counts, dtype=float) bin_width = np.diff(bin_edges) entropy = -np.sum(xlogy(p, p / bin_width)) return entropy
def test_entropy(self): # Basic entropy tests. b = stats.binom(2, 0.5) expected_p = np.array([0.25, 0.5, 0.25]) expected_h = -sum(xlogy(expected_p, expected_p)) h = b.entropy() assert_allclose(h, expected_h) b = stats.binom(2, 0.0) h = b.entropy() assert_equal(h, 0.0) b = stats.binom(2, 1.0) h = b.entropy() assert_equal(h, 0.0)
def entropy_rate(self): """ Returns the estimated entropy rate of the Markov chain """ from rpy2.robjects import r, globalenv from itertools import product import pandas as pd import pandas.rpy.common as com from scipy.special import xlogy r("library('DTMCPack')") globalenv['transmat'] = com.convert_to_r_dataframe(pd.DataFrame(self.transmat)) stationary_dist = r("statdistr(transmat)") # long_as = lambda x: range(len(x)) rate = 0 for s1, s2 in product(range(len(self.means)), range(len(self.means))): p = self.transmat[s1][s2] rate -= stationary_dist[s1] * xlogy(p, p) return rate
def log_loss(y_true, y_prob): """Compute Logistic loss for classification. Parameters ---------- y_true : array-like or label indicator matrix Ground truth (correct) labels. y_prob : array-like of float, shape = (n_samples, n_classes) Predicted probabilities, as returned by a classifier's predict_proba method. Returns ------- loss : float The degree to which the samples are correctly predicted. """ if y_prob.shape[1] == 1: y_prob = np.append(1 - y_prob, y_prob, axis=1) if y_true.shape[1] == 1: y_true = np.append(1 - y_true, y_true, axis=1) return - xlogy(y_true, y_prob).sum() / y_prob.shape[0]
def _logpmf(self, x, n, p): coeff = gamln(n + x) - gamln(x + 1) - gamln(n) return coeff + special.xlogy(n, p) + special.xlog1py(x, -p)
def numeric(self, values): x = values[0] results = -xlogy(x, x) # Return -inf outside the domain results[np.isnan(results)] = -np.inf return results
def _logpmf(self, x, n, p): k = floor(x) combiln = (gamln(n+1) - (gamln(k+1) + gamln(n-k+1))) return combiln + special.xlogy(k, p) + special.xlog1py(n-k, -p)
def _logpmf(self, k, mu): Pk = special.xlogy(k, mu) - gamln(k + 1) - mu return Pk
def get_endpoint_neg_ll(self, J_other): """ Compute the log likelihood for only the endpoint distribution. """ return -xlogy(J_other, self.joint_distn).sum()
def numeric(self, values): x = values[0] y = values[1] #TODO return inf outside the domain return xlogy(x, x/y) - x + y
def _entropy(self, p): h = -special.xlogy(p, p) - special.xlogy(1 - p, 1 - p) return h
def _entropy(self, M, n, N): k = np.r_[N - (M - n):min(n, N) + 1] vals = self.pmf(k, M, n, N) h = -np.sum(special.xlogy(vals, vals), axis=0) return h
def _entropy(self, n, p): k = np.r_[0:n + 1] vals = self._pmf(k, n, p) h = -np.sum(special.xlogy(vals, vals), axis=0) return h
def run(model, compound_states, node_to_data_fset): """ Parameters ---------- model : Python module a module with hardcoded information about the model """ # Get the analog of the genetic code. primary_to_tol = model.get_primary_to_tol() # Get the number of compound states. ncompound = len(compound_states) # Get the prior blink state distribution. blink_distn = model.get_blink_distn() #TODO check that the primary rate matrix is time-reversible # Get the primary rate matrix and convert it to a dense ndarray. nprimary = 6 Q_primary_nx = model.get_Q_primary() Q_primary_dense = nx_to_np_rate_matrix(Q_primary_nx, range(nprimary)) primary_distn = model.get_primary_distn() primary_distn_dense = np.array([primary_distn[i] for i in range(nprimary)]) # Get the expected rate using only the nx rate matrix and the nx distn. expected_primary_rate = 0 for sa, sb in Q_primary_nx.edges(): p = primary_distn[sa] rate = Q_primary_nx[sa][sb]['weight'] expected_primary_rate += p * rate # Normalize the primary rate matrix by dividing all rates # by the expected rate. for sa, sb in Q_primary_nx.edges(): Q_primary_nx[sa][sb]['weight'] /= expected_primary_rate # Get the rooted directed tree shape. T, root = model.get_T_and_root() # Get the map from ordered tree edge to branch length. # The branch length has complicated units. # It is the expected number of primary process transitions # along the branch conditional on all tolerance classes being tolerated. edge_to_blen = model.get_edge_to_blen() # Define some indicators for the compound process indicators = define_compound_process( Q_primary_nx, compound_states, primary_to_tol) I_syn, I_non, I_on, I_off = indicators # Define the dense compound transition rate matrix on_rate = model.get_rate_on() off_rate = model.get_rate_off() Q_compound_nx = get_Q_compound( Q_primary_nx, on_rate, off_rate, primary_to_tol, compound_states) Q_compound = nx_to_np(Q_compound_nx, compound_states) row_sums = np.sum(Q_compound, axis=1) Q_compound = Q_compound - np.diag(row_sums) # Define a sparse stationary distribution over compound states. compound_distn = {} for state in compound_states: if compound_state_is_ok(primary_to_tol, state): p = 1.0 p *= primary_distn[state.P] for tol_name in 'T0', 'T1', 'T2': if primary_to_tol[state.P] != tol_name: tol_state = getattr(state, tol_name) p *= blink_distn[tol_state] compound_distn[state] = p total = sum(compound_distn.values()) assert_allclose(total, 1) # Convert the compound state distribution to a dense array. # Check that the distribution is at equilibrium. compound_distn_np = np.array([ compound_distn.get(k, 0) for k in compound_states]) equilibrium_rates = np.dot(compound_distn_np, Q_compound) assert_allclose(equilibrium_rates, 0, atol=1e-10) # Make the np and nx transition probability matrices. # Map each branch to the transition matrix. edge_to_P_np = {} edge_to_P_nx = {} for edge in T.edges(): t = edge_to_blen[edge] P_np = scipy.linalg.expm(Q_compound * t) P_nx = np_to_nx_transition_matrix(P_np, compound_states) edge_to_P_np[edge] = P_np edge_to_P_nx[edge] = P_nx # Compute the likelihood lhood = dynamic_fset_lhood.get_lhood( T, edge_to_P_nx, root, compound_distn, node_to_data_fset) print('likelihood:') print(lhood) print() # Compute the map from node to posterior state distribution. # Convert the dict distribution back into a dense distribution. # This is used in the calculation of expected log likelihood. node_to_distn = dynamic_fset_lhood.get_node_to_distn( T, edge_to_P_nx, root, compound_distn, node_to_data_fset) root_distn = node_to_distn[root] print('prior distribution at the root:') for i, p in sorted(compound_distn.items()): print(i, p) print() print('posterior distribution at the root:') for i, p in sorted(root_distn.items()): print(i, p) print() root_distn_np = np.zeros(ncompound) for i, s in enumerate(compound_states): if s in root_distn: root_distn_np[i] = root_distn[s] # Compute the map from edge to posterior joint state distribution. # Convert the nx transition probability matrices back into dense ndarrays. edge_to_nxdistn = dynamic_fset_lhood.get_edge_to_nxdistn( T, edge_to_P_nx, root, compound_distn, node_to_data_fset) edge_to_J = {} for edge, J_nx in edge_to_nxdistn.items(): J_np = nx_to_np(J_nx, compound_states) edge_to_J[edge] = J_np # Initialize contributions to the expected log likelihood. # # Compute the contribution of the initial state distribution. ell_init = xlogy(root_distn_np, compound_distn_np).sum() # Initialize the contribution of the expected transitions. I_all = I_on + I_off + I_syn + I_non I_log_all = xlogy(I_all, Q_compound) ell_trans = 0 # Initialize the contribution of the dwell times. ell_dwell = 0 # Compute labeled transition count expectations # using the rate matrix, the joint posterior state distribution matrices, # the indicator matrices, and the conditional transition probability # distribution matrix. primary_expectation = 0 blink_expectation = 0 for edge in T.edges(): va, vb = edge Q = Q_compound J = edge_to_J[edge] P = edge_to_P_np[edge] t = edge_to_blen[edge] # primary transition event count expectations syn_total = compute_edge_expectation(Q, P, J, I_syn, t) non_total = compute_edge_expectation(Q, P, J, I_non, t) primary_expectation += syn_total primary_expectation += non_total print('edge %s -> %s syn expectation %s' % (va, vb, syn_total)) print('edge %s -> %s non expectation %s' % (va, vb, non_total)) # blink transition event count expectations on_total = compute_edge_expectation(Q, P, J, I_on, t) off_total = compute_edge_expectation(Q, P, J, I_off, t) blink_expectation += on_total blink_expectation += off_total print('edge %s -> %s on expectation %s' % (va, vb, on_total)) print('edge %s -> %s off expectation %s' % (va, vb, off_total)) # Compute expectation of logs of rates of observed transitions. # This is part of the expected log likelihood calculation. contrib = compute_edge_expectation(Q, P, J, I_log_all, t) ell_trans += contrib print('edge %s -> %s ell trans contrib %s' % (va, vb, contrib)) # Compute sum of expectations of dwell times contrib = compute_dwell_times(Q, P, J, -row_sums, t) ell_dwell += contrib print('edge %s -> %s ell dwell contrib %s' % (va, vb, contrib)) print() print('expected count of primary process transitions:') print(primary_expectation) print() print('expected count of blink process transitions:') print(blink_expectation) print() print('expected log likelihood:') print('contribution of initial state distribution :', ell_init) print('contribution of expected transition counts :', ell_trans) print('contribution of expected dwell times :', ell_dwell) print('total :', ( ell_init + ell_trans + ell_dwell)) print()
def get_tolerance_ll_contribs( rate_on, rate_off, total_tree_length, expected_initial_on, expected_initial_off, expected_dwell_on, expected_dwell_off, expected_nabsorptions, expected_ngains, expected_nlosses, ): """ Tolerance process log likelihood contributions. Note that the contributions associated with dwell times subsume the primary process dwell time log likelihood contributions. The first group of args defines parameters of the process. The second group defines the posterior tolerance distribution at the root. The third group defines the posterior tolerance dwell times. The fourth group is just a virtual posterior nabsorptions count which is related to dwell times of the primary process. The fifth group defines posterior transition expectations. Parameters ---------- rate_on : float x rate_off : float x total_tree_length : float x expected_initial_on : float x expected_initial_off : float x expected_dwell_on : float x expected_dwell_off : float x expected_nabsorptions : float x expected_ngains : float x expected_nlosses : float x Returns ------- init_ll_contrib : float x dwell_ll_contrib_prim : float x dwell_ll_contrib_tol : float x trans_ll_contrib : float x """ tolerance_distn = get_tolerance_distn(rate_off, rate_on) init_ll_contrib = ( special.xlogy(expected_initial_on - 1, tolerance_distn[1]) + special.xlogy(expected_initial_off, tolerance_distn[0])) dwell_ll_contrib_prim = -expected_nabsorptions dwell_ll_contrib_tol = -( expected_dwell_off * rate_on + (expected_dwell_on - total_tree_length) * rate_off) trans_ll_contrib = ( special.xlogy(expected_ngains, rate_on) + special.xlogy(expected_nlosses, rate_off)) ll_contribs = ( init_ll_contrib, dwell_ll_contrib_prim, dwell_ll_contrib_tol, trans_ll_contrib) return ll_contribs
def test_fully_augmented_likelihood_sufficient_statistics(self): # If we fix all of the parameters of the model except for the two # parameters that correspond to the tolerance transition rates, # then this model has low-dimensional sufficient statistics. # I think that these two parameters are associated # with three sufficient statistics. # Define the tolerance process rates. rate_on = 0.5 rate_off = 1.5 # Define some other properties of the process, # in a way that is not object-oriented. info = get_example_tolerance_process_info(rate_on, rate_off) (primary_distn, Q, primary_to_part, compound_to_primary, compound_to_tolerances, compound_distn, Q_compound) = info # Summarize the other properties. nprimary = len(primary_distn) nparts = len(set(primary_to_part.values())) total_tolerance_rate = rate_on + rate_off tolerance_distn = get_tolerance_distn(rate_off, rate_on) # Define a tree with edge weights. T = nx.Graph() T.add_edge(0, 1, weight=0.1) T.add_edge(2, 1, weight=0.2) T.add_edge(1, 3, weight=5.3) T.add_edge(3, 4, weight=0.4) T.add_edge(3, 5, weight=0.5) # Summarize the total tree length. total_tree_length = sum(T[a][b]['weight'] for a, b in T.edges()) # Randomly assign compound leaf states. choices = list(compound_distn) node_to_compound_state = dict( (n, random.choice(choices)) for n in (0, 2, 4, 5)) # Test the likelihood calculations # for a few conditionally sampled histories on the tree. nhistories = 10 for compound_process_history in gen_histories( T, Q_compound, node_to_compound_state, nhistories=nhistories): # Summarize the compound process history. dwell_times = get_history_dwell_times( compound_process_history) root_state, transitions = get_history_root_state_and_transitions( compound_process_history) # Get the total rate away from each compound state. total_rates = get_total_rates(Q_compound) # Directly compute the log likelihood of the history. ll_initial = np.log(compound_distn[root_state]) ll_dwell = 0.0 for compound_state, dwell_time in dwell_times.items(): ll_dwell -= dwell_time * total_rates[compound_state] ll_transitions = 0.0 for a, b in transitions.edges(): ntrans = transitions[a][b]['weight'] rate = Q_compound[a][b]['weight'] ll_transitions += special.xlogy(ntrans, rate) direct_ll_initrans = ll_initial + ll_transitions direct_ll_dwell = ll_dwell # Compute the log likelihood through sufficient statistics. # Get the number of tolerance gains # plus the number of initial tolerances, # and the number of tolerance losses # plus the number of initial lack-of-tolerances. ngains_stat = 0 nlosses_stat = 0 for tol in compound_to_tolerances[root_state]: if tol == 1: ngains_stat += 1 elif tol == 0: nlosses_stat += 1 else: raise Exception('invalid root tolerance state') for a, b in transitions.edges(): if a == b: continue ntransitions = transitions[a][b]['weight'] prim_a = compound_to_primary[a] prim_b = compound_to_primary[b] tols_a = compound_to_tolerances[a] tols_b = compound_to_tolerances[b] tols_diff = [y-x for x, y in zip(tols_a, tols_b)] ndiffs = sum(1 for x in tols_diff if x) if prim_a == prim_b: if ndiffs == 0: raise Exception( 'expected each non-self transition ' 'to have either a primary state change ' 'or a tolerance state change') elif ndiffs > 1: raise Exception( 'expected at most one tolerance state ' 'difference but observed %d' % ndiffs) elif ndiffs != 1: raise Exception('internal error') signed_hdist = sum(tols_diff) if signed_hdist == 1: ngains_stat += ntransitions elif signed_hdist == -1: nlosses_stat += ntransitions else: raise Exception('invalid tolerance process transition') # Get the total amount of time spent in tolerated states, # summed over each tolerance class. tolerance_duration_stat = 0.0 for compound_state, dwell_time in dwell_times.items(): ntols = sum(compound_to_tolerances[compound_state]) tolerance_duration_stat += dwell_time * ntols # Initialize the log likelihood for this more clever approach. ll_initrans = 0.0 ll_dwell = 0.0 # Add the log likelihood contributions that involve # the sufficient statistics and the on/off tolerance rates. ll_initrans -= special.xlogy(nparts-1, total_tolerance_rate) ll_initrans += special.xlogy(ngains_stat-1, rate_on) ll_initrans += special.xlogy(nlosses_stat, rate_off) ll_dwell -= rate_off * ( tolerance_duration_stat - total_tree_length) ll_dwell -= rate_on * ( total_tree_length * nparts - tolerance_duration_stat) # Add the log likelihood contributions that involve # general functions of the data and not the on/off tolerance rates. # On the other hand, they do involve the tolerance state. root_primary_state = compound_to_primary[root_state] ll_initrans += np.log(primary_distn[root_primary_state]) for compound_state, dwell_time in dwell_times.items(): primary_state = compound_to_primary[compound_state] primary_rate_out = 0.0 for sink in Q_compound[compound_state]: if compound_to_primary[sink] != primary_state: rate = Q_compound[compound_state][sink]['weight'] primary_rate_out += rate ll_dwell -= dwell_time * primary_rate_out for a, b in transitions.edges(): edge = transitions[a][b] ntransitions = edge['weight'] prim_a = compound_to_primary[a] prim_b = compound_to_primary[b] if prim_a != prim_b: rate = Q_compound[a][b]['weight'] ll_initrans += special.xlogy(ntransitions, rate) clever_ll_initrans = ll_initrans clever_ll_dwell = ll_dwell # Compare the two log likelihood calculations. assert_allclose(direct_ll_initrans, clever_ll_initrans) assert_allclose(direct_ll_dwell, clever_ll_dwell)
def _h_thin_plate(self, r): return xlogy(r**2, r)
def get_trajectory_log_likelihood( T_aug, root, prior_root_distn, Q_default=None): """ Parameters ---------- T_aug : undirected weighted networkx graph Trajectory with weighted edges annotated with states. root : integer Root node. prior_root_distn : dict Prior distribution over states at the root. Q_default : directed weighted networkx graph Rate matrix which applies to all edges. Returns ------- log_likelihood : float Logarithm of the trajectory likelihood according to the given Markov jump process. Notes ----- Regarding the order of the arguments of this function, T_aug is first to facilitate functools.partial wrapping for MCMC callback. """ # Compute the total rates. total_rates = get_total_rates(Q_default) # Compute primary process statistics. # These will be used for two purposes. # One of the purposes is as the denominator of the # importance sampling ratio. # The second purpose is to compute contributions # to the neg log likelihood estimate. info = get_history_statistics(T_aug, root=root) dwell_times, root_state, transitions = info # contribution of root state to log likelihood if root_state in prior_root_distn: init_ll = np.log(prior_root_distn[root_state]) else: init_ll = -np.inf # contribution of dwell times ll = 0.0 for state, dwell in dwell_times.items(): if state in total_rates: ll -= dwell * total_rates[state] dwell_ll = ll # contribution of transitions ll = 0.0 for sa, sb in transitions.edges(): ntransitions = transitions[sa][sb]['weight'] if ntransitions: if Q_default.has_edge(sa, sb): rate = Q_default[sa][sb]['weight'] ll += special.xlogy(ntransitions, rate) else: ll = -np.inf trans_ll = ll # Return the sum of the log likelihood contributions. log_likelihood = init_ll + dwell_ll + trans_ll return log_likelihood