def test_calc_number_subs(self): """correctly compute ENS""" mprobs = diag([0.1, 0.2, 0.3, 0.4]) moprobs = array([0.1, 0.2, 0.3, 0.4]) def get_calibrated_Q(R): Q = dot(R, mprobs) diag_add = diag(np.sum(Q, axis=1)) to_divide = np.dot(moprobs, np.sum(Q, axis=1)) Q -= diag_add Q /= to_divide return Q R = array([[0, 2, 1, 1], [2, 0, 1, 1], [1, 1, 0, 2], [1, 1, 2, 0]], dtype=float) Q = get_calibrated_Q(R) length = 0.1 got = expm.expected_number_subs(moprobs, Q, length) assert_allclose(got, length) # case 2, length != ENS A = array([[0, 1, 1, 1], [2, 0, 1, 1], [1, 1, 0, 40], [1, 1, 1, 0]], dtype=float) Q = get_calibrated_Q(A) length = 0.2 got = expm.expected_number_subs(moprobs, Q, length) self.assertNotAlmostEqual(got, length)
def get_lengths_as_ens(self, motif_probs=None): """returns {edge.name: ens, ...} where ens is the expected number of substitutions for a stationary Markov process, this is just branch length Parameters ---------- motif_probs : dict or DictArray an item for each edge of the tree. Computed if not provided. """ if motif_probs is None: motif_probs = self.get_motif_probs_by_node() node_names = self.tree.get_node_names() node_names.remove("root") lengths = { e: self.get_param_value("length", edge=e) for e in node_names } if not isinstance(self.model, substitution_model.Stationary): ens = {} for e in node_names: Q = self.get_rate_matrix_for_edge(e) length = expected_number_subs(motif_probs[e], Q, lengths[e]) ens[e] = length lengths = ens return lengths