def get_response_content(fs): np.set_printoptions(linewidth=200) out = StringIO() n = fs.nstates t = 0.001 # sample the initial mutation rate matrix S = sample_symmetric_rate_matrix(n) v = sample_distribution(n) M = mrate.to_gtr_halpern_bruno(S, v) if not np.allclose(v, mrate.R_to_distn(M)): raise ValueError('stationary distribution error') print >> out, 't:', t print >> out print >> out, 'initial GTR matrix:' print >> out, M print >> out # Try to iteratively increase the relaxation time # by repeatedly applying Halpern-Bruno selection. R = M v_old = v for i in range(20): # print some properties of the matrix print >> out, v_old print >> out, mrate.R_to_relaxation_time(R) print >> out f = MyOpt(R, t) x0 = [1.0] * (n - 1) result = scipy.optimize.fmin(f, x0, disp=0, full_output=1, ftol=0.000001) xopt, fopt, niters, funcalls, warnflag = result if fopt > 0: print >> out, 'failed to increase relaxation time' print >> out break # compute the next stationary distribution v_target = X_to_distn(xopt) v_new = (1 - t) * v_old + t * v_target print >> out, v_new - v_old print >> out # compute the next rate matrix and update its stationary distribution R = mrate.to_gtr_halpern_bruno(R, v_new) if not np.allclose(v_new, mrate.R_to_distn(R)): raise ValueError('stationary distribution error') v_old = v_new print >> out, 'final rate matrix:' print >> out, R print >> out return out.getvalue()
def get_response_content(fs): np.set_printoptions(linewidth=200) out = StringIO() n = fs.nstates t = 0.001 # sample the initial mutation rate matrix S = sample_symmetric_rate_matrix(n) v = sample_distribution(n) M = mrate.to_gtr_halpern_bruno(S, v) if not np.allclose(v, mrate.R_to_distn(M)): raise ValueError('stationary distribution error') print >> out, 't:', t print >> out print >> out, 'initial GTR matrix:' print >> out, M print >> out # Try to iteratively increase the relaxation time # by repeatedly applying Halpern-Bruno selection. R = M v_old = v for i in range(20): # print some properties of the matrix print >> out, v_old print >> out, mrate.R_to_relaxation_time(R) print >> out f = MyOpt(R, t) x0 = [1.0] * (n - 1) result = scipy.optimize.fmin( f, x0, disp=0, full_output=1, ftol=0.000001) xopt, fopt, niters, funcalls, warnflag = result if fopt > 0: print >> out, 'failed to increase relaxation time' print >> out break # compute the next stationary distribution v_target = X_to_distn(xopt) v_new = (1 - t) * v_old + t * v_target print >> out, v_new - v_old print >> out # compute the next rate matrix and update its stationary distribution R = mrate.to_gtr_halpern_bruno(R, v_new) if not np.allclose(v_new, mrate.R_to_distn(R)): raise ValueError('stationary distribution error') v_old = v_new print >> out, 'final rate matrix:' print >> out, R print >> out return out.getvalue()
def test_small_variance(self): """ a = .1 b = .2 c = .7 R = np.array([ [-(b+c), b, c], [a, -(a+c), c], [a, b, -(a+b)]]) """ n = 4 v = sample_distribution(n) S = sample_symmetric_rate_matrix(n) R = mrate.to_gtr_halpern_bruno(S, v) t = 0.0000001 total_rate = mrate.Q_to_expected_rate(R) var = get_ml_variance(R, t) print 'time:', t print 'variance:', var print 'total rate:', total_rate print 'variance per time:', var / t print 'reciprocal of total rate:', 1 / total_rate print 'total rate times time:', total_rate * t print '(reciprocal of total rate) times time:', t / total_rate print
def get_response_content(fs): out = StringIO() np.set_printoptions(linewidth=200) # get the user defined variables n = fs.nstates # sample a random reversible CTMC rate matrix v = divtime.sample_distribution(n) S = divtime.sample_symmetric_rate_matrix(n) R = mrate.to_gtr_halpern_bruno(S, v) distn = mrate.R_to_distn(R) spectrum = scipy.linalg.eigvalsh(mrate.symmetrized(R)) print >> out, "random reversible CTMC rate matrix:" print >> out, R print >> out print >> out, "stationary distribution:" print >> out, distn print >> out print >> out, "spectrum:" print >> out, spectrum print >> out Q = aggregate(R) distn = mrate.R_to_distn(Q) spectrum = scipy.linalg.eigvalsh(mrate.symmetrized(Q)) print >> out, "aggregated rate matrix:" print >> out, Q print >> out print >> out, "stationary distribution:" print >> out, distn print >> out print >> out, "spectrum:" print >> out, spectrum print >> out return out.getvalue()
def __call__(self, X): """ @param X: a vector to be converted into a finite distribution """ v_target = X_to_distn(X) v_new = (1 - self.t) * self.v + self.t * v_target R = mrate.to_gtr_halpern_bruno(self.M, v_new) if not np.allclose(v_new, mrate.R_to_distn(R)): raise ValueError('stationary distribution error') r_sel = mrate.R_to_relaxation_time(R) # we want to minimize this return self.r_mut - r_sel
def __call__(self): """ @return: True if a counterexample is found """ n = self.nstates # sample a fairly generic GTR mutation rate matrix S = sample_symmetric_rate_matrix(n) v = sample_distribution(n) M = mrate.to_gtr_halpern_bruno(S, v) # look at the fiedler-like eigenvector of the mutation rate matrix r_recip, fiedler = mrate._R_to_eigenpair(M) r_mut = 1 / r_recip value_min, state_min = min((fiedler[i], i) for i in range(n)) value_max, state_max = max((fiedler[i], i) for i in range(n)) # move the stationary distribution towards a 50/50 distribution v_target = np.zeros(n) v_target[state_min] = 0.5 v_target[state_max] = 0.5 v_new = (1 - self.t) * v + self.t * v_target R = mrate.to_gtr_halpern_bruno(M, v_new) r_sel = mrate.R_to_relaxation_time(R) # the mutation-selection balance should have longer relaxation time #if r_sel < r_mut: #if True: if maxind(np.abs(fiedler / v)) != maxind(np.abs(fiedler / np.sqrt(v))): self.M = M self.fiedler = fiedler self.r_mut = r_mut self.r_sel = r_sel self.v = v self.v_new = v_new self.v_target = v_target self.opt_target = self._get_opt_target() return True else: return False
def test_large_variance(self): n = 4 v = sample_distribution(n) S = sample_symmetric_rate_matrix(n) R = mrate.to_gtr_halpern_bruno(S, v) """ a = .1 b = .2 c = .7 R = np.array([ [-(b+c), b, c], [a, -(a+c), c], [a, b, -(a+b)]]) """ t = 2.0 dt = 0.0000001 rtime = mrate.R_to_relaxation_time(R) var_a = get_ml_variance(R, t) var_b = get_ml_variance(R, t + dt) var_slope = (var_b - var_a) / dt deriv_ratio = get_p_id_deriv_ratio(R, t) clever_ratio = get_ml_variance_ratio(R, t) print 'time:', t print 'variance:', var_a print 'variance slope:', var_slope print 'var_slope / var_a:', var_slope / var_a print 'var_slope / var_a [clever]:', clever_ratio print 'log variance:', math.log(var_a) print 'relaxation time:', rtime print '2 / relaxation_time:', 2 / rtime print "p_id(t)'' / p_id(t)':", deriv_ratio print print '--- new attempt ---' print 'mutual information:', ctmcmi.get_mutual_information(R, t) print 'reciprocal of MI:', 1.0 / ctmcmi.get_mutual_information(R, t) print 'asymptotic variance:', get_asymptotic_variance(R, t) print 'asymptotic variance (ver. 2):', get_asymptotic_variance_b(R, t) print 'asymptotic variance (ver. 3):', get_asymptotic_variance_c(R, t) print 'AV approx (ver. 4):', get_asymptotic_variance_d(R, t) print 'AV approx (ver. 5):', get_asymptotic_variance_e(R, t) print print '--- another thing ---' fi_slow = get_fisher_info_known_distn(R, v, t) fi_fast = get_fisher_info_known_distn_fast(R, v, t) print 'slow asymptotic variance:', 1 / fi_slow print 'fast asymptotic variance:', 1 / fi_fast print
def test_large_variance(self): n = 4 v = sample_distribution(n) S = sample_symmetric_rate_matrix(n) R = mrate.to_gtr_halpern_bruno(S, v) """ a = .1 b = .2 c = .7 R = np.array([ [-(b+c), b, c], [a, -(a+c), c], [a, b, -(a+b)]]) """ t = 2.0 dt = 0.0000001 rtime = mrate.R_to_relaxation_time(R) var_a = get_ml_variance(R, t) var_b = get_ml_variance(R, t+dt) var_slope = (var_b - var_a) / dt deriv_ratio = get_p_id_deriv_ratio(R, t) clever_ratio = get_ml_variance_ratio(R, t) print 'time:', t print 'variance:', var_a print 'variance slope:', var_slope print 'var_slope / var_a:', var_slope / var_a print 'var_slope / var_a [clever]:', clever_ratio print 'log variance:', math.log(var_a) print 'relaxation time:', rtime print '2 / relaxation_time:', 2 / rtime print "p_id(t)'' / p_id(t)':", deriv_ratio print print '--- new attempt ---' print 'mutual information:', ctmcmi.get_mutual_information(R, t) print 'reciprocal of MI:', 1.0 / ctmcmi.get_mutual_information(R, t) print 'asymptotic variance:', get_asymptotic_variance(R, t) print 'asymptotic variance (ver. 2):', get_asymptotic_variance_b(R, t) print 'asymptotic variance (ver. 3):', get_asymptotic_variance_c(R, t) print 'AV approx (ver. 4):', get_asymptotic_variance_d(R, t) print 'AV approx (ver. 5):', get_asymptotic_variance_e(R, t) print print '--- another thing ---' fi_slow = get_fisher_info_known_distn(R, v, t) fi_fast = get_fisher_info_known_distn_fast(R, v, t) print 'slow asymptotic variance:', 1 / fi_slow print 'fast asymptotic variance:', 1 / fi_fast print
def get_response_content(fs): out = StringIO() np.set_printoptions(linewidth=200) # define the barbell mutation rate matrix M, p = get_barbell_rate_matrix(fs.p_mid) nstates = len(p) print >> out, 'barbell mutation matrix:' print >> out, M print >> out print >> out, 'all of these should be zero for detailed balance:' for i in range(nstates): for j in range(nstates): print >> out, p[i] * M[i, j] - p[j]*M[j, i] print >> out print >> out, 'expected rate of the barbell mutation matrix:' print >> out, mrate.Q_to_expected_rate(M) print >> out p_target = np.array([1/3., 1/3., 1/3.]) print >> out, 'target stationary distribution:' print >> out, p_target print >> out Q = mrate.to_gtr_halpern_bruno(M, p_target) print >> out, 'mutation-selection balance rate matrix:' print >> out, Q print >> out v = mrate.R_to_distn(Q) print >> out, 'computed stationary distribution:' print >> out, v print >> out print >> out, 'expected rate of the mutation-selection balance rate matrix:' print >> out, mrate.Q_to_expected_rate(Q) print >> out print >> out, 'all of these should be zero for detailed balance:' for i in range(nstates): for j in range(nstates): print >> out, v[i] * Q[i, j] - v[j]*Q[j, i] print >> out return out.getvalue()
def get_response_content(fs): out = StringIO() np.set_printoptions(linewidth=200) # define the barbell mutation rate matrix M, p = get_barbell_rate_matrix(fs.p_mid) nstates = len(p) print >> out, 'barbell mutation matrix:' print >> out, M print >> out print >> out, 'all of these should be zero for detailed balance:' for i in range(nstates): for j in range(nstates): print >> out, p[i] * M[i, j] - p[j] * M[j, i] print >> out print >> out, 'expected rate of the barbell mutation matrix:' print >> out, mrate.Q_to_expected_rate(M) print >> out p_target = np.array([1 / 3., 1 / 3., 1 / 3.]) print >> out, 'target stationary distribution:' print >> out, p_target print >> out Q = mrate.to_gtr_halpern_bruno(M, p_target) print >> out, 'mutation-selection balance rate matrix:' print >> out, Q print >> out v = mrate.R_to_distn(Q) print >> out, 'computed stationary distribution:' print >> out, v print >> out print >> out, 'expected rate of the mutation-selection balance rate matrix:' print >> out, mrate.Q_to_expected_rate(Q) print >> out print >> out, 'all of these should be zero for detailed balance:' for i in range(nstates): for j in range(nstates): print >> out, v[i] * Q[i, j] - v[j] * Q[j, i] print >> out return out.getvalue()
def sample_row(): n = 4 # sample the exchangeability S = np.zeros((n, n)) S[1, 0] = random.expovariate(1) S[2, 0] = random.expovariate(1) S[2, 1] = random.expovariate(1) S[3, 0] = random.expovariate(1) S[3, 1] = random.expovariate(1) S[3, 2] = random.expovariate(1) # sample the mutation stationary distribution mdistn = np.array([random.expovariate(1) for i in range(n)]) mdistn /= np.sum(mdistn) # sample the mutation selection balance stationary distribution bdistn = np.array([random.expovariate(1) for i in range(n)]) bdistn /= np.sum(bdistn) # sample the time t = random.expovariate(1) # sample the info type infotype = random.choice(('infotype.mi', 'infotype.fi')) # Compute some intermediate variables # from which the summary statistics and the label are computed. S = S + S.T M = S * mdistn M -= np.diag(np.sum(M, axis=1)) R = mrate.to_gtr_halpern_bruno(M, bdistn) shannon_ent_mut = -sum(p * log(p) for p in mdistn) shannon_ent_bal = -sum(p * log(p) for p in bdistn) logical_ent_mut = 1.0 - sum(p * p for p in mdistn) logical_ent_bal = 1.0 - sum(p * p for p in bdistn) expected_rate_mut = mrate.Q_to_expected_rate(M) expected_rate_bal = mrate.Q_to_expected_rate(R) spectral_rate_mut = 1 / mrate.R_to_relaxation_time(M) spectral_rate_bal = 1 / mrate.R_to_relaxation_time(R) mi_mut = ctmcmi.get_mutual_information(M, t) mi_bal = ctmcmi.get_mutual_information(R, t) fi_mut = divtime.get_fisher_information(M, t) fi_bal = divtime.get_fisher_information(R, t) # compute the summary statistics summary_entries = [ shannon_ent_bal - shannon_ent_mut, logical_ent_bal - logical_ent_mut, log(shannon_ent_bal) - log(shannon_ent_mut), log(logical_ent_bal) - log(logical_ent_mut), expected_rate_bal - expected_rate_mut, spectral_rate_bal - spectral_rate_mut, log(expected_rate_bal) - log(expected_rate_mut), log(spectral_rate_bal) - log(spectral_rate_mut), mi_bal - mi_mut, fi_bal - fi_mut, math.log(mi_bal) - math.log(mi_mut), math.log(fi_bal) - math.log(fi_mut), ] # get the definition entries definition_entries = [ S[1, 0], S[2, 0], S[2, 1], S[3, 0], S[3, 1], S[3, 2], mdistn[0], mdistn[1], mdistn[2], mdistn[3], bdistn[0], bdistn[1], bdistn[2], bdistn[3], infotype, t, ] # define the label if infotype == 'infotype.mi' and mi_mut > mi_bal: label = 'mut.is.better' elif infotype == 'infotype.mi' and mi_mut < mi_bal: label = 'bal.is.better' elif infotype == 'infotype.fi' and fi_mut > fi_bal: label = 'mut.is.better' elif infotype == 'infotype.fi' and fi_mut < fi_bal: label = 'bal.is.better' else: label = 'indistinguishable' # return the row return definition_entries + summary_entries + [label]
def get_input_matrices(fs): """ @return: M, R """ # get the positive strict lower triangular part of the S matrix L = [] for i, line in enumerate(fs.lowtri): values = line.split() if len(values) != i + 1: raise ValueError('expected %d values on line "%s"' % (i + 1, line)) vs = [float(v) for v in values] if any(x < 0 for x in vs): raise ValueError('exchangeabilities must be nonnegative') L.append(vs) # get the mut and mutsel weights mut_weights = [float(v) for v in fs.mutweights] mutsel_weights = [float(v) for v in fs.mutselweights] if any(x <= 0 for x in mut_weights + mutsel_weights): raise ValueError('stationary weights must be positive') # normalize weights to distributions mut_distn = [v / sum(mut_weights) for v in mut_weights] mutsel_distn = [v / sum(mutsel_weights) for v in mutsel_weights] # get the exchangeability matrix nstates = len(L) + 1 S = np.zeros((nstates, nstates)) for i, row in enumerate(L): for j, v in enumerate(row): S[i + 1, j] = v S[j, i + 1] = v # check the state space sizes implied by the inputs if len(set(len(x) for x in (S, mut_weights, mutsel_weights))) != 1: raise ValueError('the inputs do not agree on the state space size') # check for sufficient number of states if nstates < 2: raise ValueError('at least two states are required') # check reducibility of the exchangeability if not MatrixUtil.is_symmetric_irreducible(S): raise ValueError('exchangeability is not irreducible') # get the mutation rate matrix M = S * mut_distn * fs.mutscale M -= np.diag(np.sum(M, axis=1)) # check sign symmetry and irreducibility if not MatrixUtil.is_symmetric_irreducible(np.sign(M)): raise ValueError( 'mutation rate matrix is not sign symmetric irreducible') # get the mutation selection balance rate matrix R = mrate.to_gtr_halpern_bruno(M, mutsel_distn) # check sign symmetry and irreducibility if not MatrixUtil.is_symmetric_irreducible(np.sign(R)): raise ValueError('mut-sel balance rate matrix ' 'is not sign symmetric irreducible') # check the stationary distributions mut_distn_observed = mrate.R_to_distn(M) if not np.allclose(mut_distn_observed, mut_distn): raise ValueError( 'internal mut stationary distribution computation error') mutsel_distn_observed = mrate.R_to_distn(R) if not np.allclose(mutsel_distn_observed, mutsel_distn): raise ValueError( 'internal mut-sel stationary distribution computation error') # return the values return M, R
def get_response_content(fs): out = StringIO() np.set_printoptions(linewidth=200) # get the user defined variables n = fs.nstates t = fs.divtime #h = fs.delta # sample a random rate matrix v = divtime.sample_distribution(n) S = divtime.sample_symmetric_rate_matrix(n) R = mrate.to_gtr_halpern_bruno(S, v) # get some properties of the rate matrix distn = mrate.R_to_distn(R) spectrum = np.linalg.eigvalsh(mrate.symmetrized(R)) #spectrum, U = np.linalg.eigh(mrate.symmetrized(R)) #spectrum = np.linalg.eigvals(R) # report some information about the mutual information curve mi = ctmcmi.get_mutual_information(R, t) mi_diff = ctmcmi.get_mutual_information_diff(R, t) mi_diff_b = ctmcmi.get_mutual_information_diff_b(R, t) mi_diff_c = ctmcmi.get_mutual_information_diff_c(R, t) print >> out, 'arbitrary large-ish divergence time:' print >> out, t print >> out print >> out, 'randomly sampled reversible rate matrix:' print >> out, R print >> out print >> out, 'stationary distribution:' print >> out, distn print >> out print >> out, 'spectrum of the rate matrix:' print >> out, spectrum print >> out print >> out, 'mutual information at t = %f:' % t print >> out, mi print >> out print >> out, 'mutual information at t = %f (ver. 2):' % t print >> out, ctmcmi.get_mutual_information_b(R, t) print >> out print >> out, 'large t approximation of MI at t = %f:' % t print >> out, ctmcmi.get_mutual_information_approx(R, t) print >> out print >> out, 'large t approximation of MI at t = %f (ver. 2):' % t print >> out, ctmcmi.get_mutual_information_approx_b(R, t) print >> out print >> out, 'large t approximation of MI at t = %f (ver. 3):' % t print >> out, ctmcmi.cute_MI_alternate(R, t) print >> out print >> out, 'large t approximation of MI at t = %f (ver. 4):' % t print >> out, ctmcmi.get_mutual_information_approx_c(R, t) print >> out print >> out, 'small t approximation of MI at t = %f:' % t print >> out, ctmcmi.get_mutual_information_small_approx(R, t) print >> out print >> out, 'small t approximation of MI at t = %f (ver. 2):' % t print >> out, ctmcmi.get_mutual_information_small_approx_b(R, t) print >> out print >> out, 'small t approximation of MI at t = %f (ver. 3):' % t print >> out, ctmcmi.get_mutual_information_small_approx_c(R, t) print >> out print >> out, 'small t approximation of MI at t = %f (ver. 4):' % t print >> out, ctmcmi.get_mutual_information_small_approx_d(R, t) print >> out print >> out, 'mutual information diff at t = %f:' % t print >> out, mi_diff print >> out print >> out, 'mutual information diff at t = %f (ver. 2):' % t print >> out, mi_diff_b print >> out print >> out, 'mutual information diff at t = %f (ver. 3):' % t print >> out, mi_diff_c print >> out print >> out, 'large t approximation of MI diff at t = %f:' % t print >> out, ctmcmi.get_mutual_information_diff_approx(R, t) print >> out print >> out, 'large t approximation of MI diff at t = %f: (ver. 2)' % t print >> out, ctmcmi.get_mutual_information_diff_approx_b(R, t) print >> out print >> out, 'large t approximation of MI diff at t = %f: (ver. 4)' % t print >> out, ctmcmi.get_mutual_information_diff_approx_c(R, t) print >> out print >> out, 'log of mutual information at t = %f:' % t print >> out, math.log(mi) print >> out #print >> out, 'estimated derivative', #print >> out, 'of log of mutual information at t = %f:' % t #print >> out, (math.log(mi_c) - math.log(mi_a)) / (2*h) #print >> out print >> out, 'estimated derivative of log of MI', print >> out, 'at t = %f:' % t print >> out, mi_diff / mi print >> out print >> out, 'large t approximation of derivative of log of MI', print >> out, 'at t = %f:' % t print >> out, ctmcmi.get_mutual_information_diff_approx(R, t) / ctmcmi.get_mutual_information_approx(R, t) print >> out print >> out, 'large t approximation of derivative of log of MI', print >> out, 'at t = %f (ver. 2):' % t print >> out, ctmcmi.get_mutual_information_diff_approx_b(R, t) / ctmcmi.get_mutual_information_approx_b(R, t) print >> out print >> out, 'twice the relevant eigenvalue:' print >> out, 2 * spectrum[-2] print >> out print >> out #print >> out, 'estimated derivative', #print >> out, 'of mutual information at t = %f:' % t #print >> out, (mi_c - mi_a) / (2*h) #print >> out #print >> out, '(estimated derivative of mutual information) /', #print >> out, '(mutual information) at t = %f:' % t #print >> out, (mi_c - mi_a) / (2*h*mi_b) #print >> out return out.getvalue()
def get_response_content(fs): out = StringIO() np.set_printoptions(linewidth=200) # get the user defined variables n = fs.nstates t = fs.divtime #h = fs.delta # sample a random rate matrix v = divtime.sample_distribution(n) S = divtime.sample_symmetric_rate_matrix(n) R = mrate.to_gtr_halpern_bruno(S, v) # get some properties of the rate matrix distn = mrate.R_to_distn(R) spectrum = np.linalg.eigvalsh(mrate.symmetrized(R)) #spectrum, U = np.linalg.eigh(mrate.symmetrized(R)) #spectrum = np.linalg.eigvals(R) # report some information about the mutual information curve mi = ctmcmi.get_mutual_information(R, t) mi_diff = ctmcmi.get_mutual_information_diff(R, t) mi_diff_b = ctmcmi.get_mutual_information_diff_b(R, t) mi_diff_c = ctmcmi.get_mutual_information_diff_c(R, t) print >> out, 'arbitrary large-ish divergence time:' print >> out, t print >> out print >> out, 'randomly sampled reversible rate matrix:' print >> out, R print >> out print >> out, 'stationary distribution:' print >> out, distn print >> out print >> out, 'spectrum of the rate matrix:' print >> out, spectrum print >> out print >> out, 'mutual information at t = %f:' % t print >> out, mi print >> out print >> out, 'mutual information at t = %f (ver. 2):' % t print >> out, ctmcmi.get_mutual_information_b(R, t) print >> out print >> out, 'large t approximation of MI at t = %f:' % t print >> out, ctmcmi.get_mutual_information_approx(R, t) print >> out print >> out, 'large t approximation of MI at t = %f (ver. 2):' % t print >> out, ctmcmi.get_mutual_information_approx_b(R, t) print >> out print >> out, 'large t approximation of MI at t = %f (ver. 3):' % t print >> out, ctmcmi.cute_MI_alternate(R, t) print >> out print >> out, 'large t approximation of MI at t = %f (ver. 4):' % t print >> out, ctmcmi.get_mutual_information_approx_c(R, t) print >> out print >> out, 'small t approximation of MI at t = %f:' % t print >> out, ctmcmi.get_mutual_information_small_approx(R, t) print >> out print >> out, 'small t approximation of MI at t = %f (ver. 2):' % t print >> out, ctmcmi.get_mutual_information_small_approx_b(R, t) print >> out print >> out, 'small t approximation of MI at t = %f (ver. 3):' % t print >> out, ctmcmi.get_mutual_information_small_approx_c(R, t) print >> out print >> out, 'small t approximation of MI at t = %f (ver. 4):' % t print >> out, ctmcmi.get_mutual_information_small_approx_d(R, t) print >> out print >> out, 'mutual information diff at t = %f:' % t print >> out, mi_diff print >> out print >> out, 'mutual information diff at t = %f (ver. 2):' % t print >> out, mi_diff_b print >> out print >> out, 'mutual information diff at t = %f (ver. 3):' % t print >> out, mi_diff_c print >> out print >> out, 'large t approximation of MI diff at t = %f:' % t print >> out, ctmcmi.get_mutual_information_diff_approx(R, t) print >> out print >> out, 'large t approximation of MI diff at t = %f: (ver. 2)' % t print >> out, ctmcmi.get_mutual_information_diff_approx_b(R, t) print >> out print >> out, 'large t approximation of MI diff at t = %f: (ver. 4)' % t print >> out, ctmcmi.get_mutual_information_diff_approx_c(R, t) print >> out print >> out, 'log of mutual information at t = %f:' % t print >> out, math.log(mi) print >> out #print >> out, 'estimated derivative', #print >> out, 'of log of mutual information at t = %f:' % t #print >> out, (math.log(mi_c) - math.log(mi_a)) / (2*h) #print >> out print >> out, 'estimated derivative of log of MI', print >> out, 'at t = %f:' % t print >> out, mi_diff / mi print >> out print >> out, 'large t approximation of derivative of log of MI', print >> out, 'at t = %f:' % t print >> out, ctmcmi.get_mutual_information_diff_approx( R, t) / ctmcmi.get_mutual_information_approx(R, t) print >> out print >> out, 'large t approximation of derivative of log of MI', print >> out, 'at t = %f (ver. 2):' % t print >> out, ctmcmi.get_mutual_information_diff_approx_b( R, t) / ctmcmi.get_mutual_information_approx_b(R, t) print >> out print >> out, 'twice the relevant eigenvalue:' print >> out, 2 * spectrum[-2] print >> out print >> out #print >> out, 'estimated derivative', #print >> out, 'of mutual information at t = %f:' % t #print >> out, (mi_c - mi_a) / (2*h) #print >> out #print >> out, '(estimated derivative of mutual information) /', #print >> out, '(mutual information) at t = %f:' % t #print >> out, (mi_c - mi_a) / (2*h*mi_b) #print >> out return out.getvalue()
def sample_row(): n = 4 # sample the exchangeability S = np.zeros((n, n)) S[1,0] = random.expovariate(1) S[2,0] = random.expovariate(1) S[2,1] = random.expovariate(1) S[3,0] = random.expovariate(1) S[3,1] = random.expovariate(1) S[3,2] = random.expovariate(1) # sample the mutation stationary distribution mdistn = np.array([random.expovariate(1) for i in range(n)]) mdistn /= np.sum(mdistn) # sample the mutation selection balance stationary distribution bdistn = np.array([random.expovariate(1) for i in range(n)]) bdistn /= np.sum(bdistn) # sample the time t = random.expovariate(1) # sample the info type infotype = random.choice(('infotype.mi', 'infotype.fi')) # Compute some intermediate variables # from which the summary statistics and the label are computed. S = S + S.T M = S * mdistn M -= np.diag(np.sum(M, axis=1)) R = mrate.to_gtr_halpern_bruno(M, bdistn) shannon_ent_mut = -sum(p*log(p) for p in mdistn) shannon_ent_bal = -sum(p*log(p) for p in bdistn) logical_ent_mut = 1.0 - sum(p*p for p in mdistn) logical_ent_bal = 1.0 - sum(p*p for p in bdistn) expected_rate_mut = mrate.Q_to_expected_rate(M) expected_rate_bal = mrate.Q_to_expected_rate(R) spectral_rate_mut = 1 / mrate.R_to_relaxation_time(M) spectral_rate_bal = 1 / mrate.R_to_relaxation_time(R) mi_mut = ctmcmi.get_mutual_information(M, t) mi_bal = ctmcmi.get_mutual_information(R, t) fi_mut = divtime.get_fisher_information(M, t) fi_bal = divtime.get_fisher_information(R, t) # compute the summary statistics summary_entries = [ shannon_ent_bal - shannon_ent_mut, logical_ent_bal - logical_ent_mut, log(shannon_ent_bal) - log(shannon_ent_mut), log(logical_ent_bal) - log(logical_ent_mut), expected_rate_bal - expected_rate_mut, spectral_rate_bal - spectral_rate_mut, log(expected_rate_bal) - log(expected_rate_mut), log(spectral_rate_bal) - log(spectral_rate_mut), mi_bal - mi_mut, fi_bal - fi_mut, math.log(mi_bal) - math.log(mi_mut), math.log(fi_bal) - math.log(fi_mut), ] # get the definition entries definition_entries = [ S[1,0], S[2,0], S[2,1], S[3,0], S[3,1], S[3,2], mdistn[0], mdistn[1], mdistn[2], mdistn[3], bdistn[0], bdistn[1], bdistn[2], bdistn[3], infotype, t, ] # define the label if infotype == 'infotype.mi' and mi_mut > mi_bal: label = 'mut.is.better' elif infotype == 'infotype.mi' and mi_mut < mi_bal: label = 'bal.is.better' elif infotype == 'infotype.fi' and fi_mut > fi_bal: label = 'mut.is.better' elif infotype == 'infotype.fi' and fi_mut < fi_bal: label = 'bal.is.better' else: label = 'indistinguishable' # return the row return definition_entries + summary_entries + [label]
def get_response_content(fs): np.set_printoptions(linewidth=200) out = StringIO() R_jc = get_jc_rate_matrix() t = 0.1 x = 1.6 w = 0.5 * log(x) v = x_to_distn(x) R_hb_easy = mrate.to_gtr_halpern_bruno(R_jc, v) y, z, = mrate.x_to_halpern_bruno_yz(x) yz_ratio = y / z R_hb_tedious = get_mut_sel_rate_matrix(y, z) P_hb_easy = get_trans_mat_expm(R_hb_easy, t) P_hb_tedious = get_trans_mat_tediously(y, z, t) P_hb_tedious_c = get_trans_mat_tediously_c(y, z, t) P_hb_from_x = get_trans_mat_from_x(x, t) e_ll_jc = ctmcmi.get_expected_ll_ratio(R_jc, t) e_ll_jc_tedious = get_jc_e_ll(t) e_ll_hb = ctmcmi.get_expected_ll_ratio(R_hb_easy, t) e_ll_hb_from_x = get_e_ll_from_x(x, t) e_ll_hb_from_x_b = get_e_ll_from_x_b(x, t) e_ll_hb_from_x_htrig = get_e_ll_from_x_htrig(x, t) # print some values print >> out, 'Jukes-Cantor mutation matrix:' print >> out, R_jc print >> out print >> out, 'ratio of common to uncommon probabilities:' print >> out, x print >> out print >> out, '1/2 log ratio:' print >> out, w print >> out print >> out, 'fast rate:' print >> out, y print >> out print >> out, 'slow rate:' print >> out, z print >> out print >> out, 'reciprocal of fast rate:' print >> out, 1.0 / y print >> out print >> out, 'ratio of fast to slow rates (should be x):' print >> out, yz_ratio print >> out print >> out, 'mutation-selection rate matrix (easy):' print >> out, R_hb_easy print >> out print >> out, 'mutation-selection rate matrix (tedious):' print >> out, R_hb_tedious print >> out print >> out, 'time:' print >> out, t print >> out print >> out, 'mutation-selection transition matrix (easy):' print >> out, P_hb_easy print >> out print >> out, 'mutation-selection transition matrix (tedious):' print >> out, P_hb_tedious print >> out print >> out, 'mutation-selection transition matrix (tedious c):' print >> out, P_hb_tedious_c print >> out print >> out, 'mutation-selection transition matrix (from x):' print >> out, P_hb_from_x print >> out print >> out, 'expected Jukes-Cantor log likelihood ratio:' print >> out, e_ll_jc print >> out print >> out, 'expected Jukes-Cantor log likelihood ratio (tedious):' print >> out, e_ll_jc_tedious print >> out print >> out, 'expected mutation-selection log likelihood ratio:' print >> out, e_ll_hb print >> out print >> out, 'expected mutation-selection ll ratio from x:' print >> out, e_ll_hb_from_x print >> out print >> out, 'expected mutation-selection ll ratio from x (impl b):' print >> out, e_ll_hb_from_x_b print >> out print >> out, 'expected mutation-selection ll ratio from x (htrig):' print >> out, e_ll_hb_from_x_htrig print >> out # check some invariants if np.allclose(R_hb_easy, R_hb_tedious): print >> out, 'halpern-bruno rate matrices are equal as expected' else: print >> out, '*** halpern-bruno rate matrices are not equal!' if np.allclose(P_hb_easy, P_hb_tedious): print >> out, 'halpern-bruno transition matrices are equal as expected' else: print >> out, '*** halpern-bruno transition matrices are not equal!' if np.allclose(P_hb_easy, P_hb_tedious_c): print >> out, 'halpern-bruno transition matrices are equal as expected' else: print >> out, '*** halpern-bruno transition matrices are not equal!' if np.allclose(P_hb_easy, P_hb_from_x): print >> out, 'halpern-bruno transition matrices are equal as expected' else: print >> out, '*** halpern-bruno trans. mat. from x is not equal!' # return the results return out.getvalue()
def get_input_matrices(fs): """ @return: M, R """ # get the positive strict lower triangular part of the S matrix L = [] for i, line in enumerate(fs.lowtri): values = line.split() if len(values) != i + 1: raise ValueError( 'expected %d values on line "%s"' % (i+1, line)) vs = [float(v) for v in values] if any(x<0 for x in vs): raise ValueError('exchangeabilities must be nonnegative') L.append(vs) # get the mut and mutsel weights mut_weights = [float(v) for v in fs.mutweights] mutsel_weights = [float(v) for v in fs.mutselweights] if any(x<=0 for x in mut_weights + mutsel_weights): raise ValueError('stationary weights must be positive') # normalize weights to distributions mut_distn = [v / sum(mut_weights) for v in mut_weights] mutsel_distn = [v / sum(mutsel_weights) for v in mutsel_weights] # get the exchangeability matrix nstates = len(L) + 1 S = np.zeros((nstates, nstates)) for i, row in enumerate(L): for j, v in enumerate(row): S[i+1, j] = v S[j, i+1] = v # check the state space sizes implied by the inputs if len(set(len(x) for x in (S, mut_weights, mutsel_weights))) != 1: raise ValueError('the inputs do not agree on the state space size') # check for sufficient number of states if nstates < 2: raise ValueError('at least two states are required') # check reducibility of the exchangeability if not MatrixUtil.is_symmetric_irreducible(S): raise ValueError('exchangeability is not irreducible') # get the mutation rate matrix M = S * mut_distn * fs.mutscale M -= np.diag(np.sum(M, axis=1)) # check sign symmetry and irreducibility if not MatrixUtil.is_symmetric_irreducible(np.sign(M)): raise ValueError( 'mutation rate matrix is not sign symmetric irreducible') # get the mutation selection balance rate matrix R = mrate.to_gtr_halpern_bruno(M, mutsel_distn) # check sign symmetry and irreducibility if not MatrixUtil.is_symmetric_irreducible(np.sign(R)): raise ValueError( 'mut-sel balance rate matrix ' 'is not sign symmetric irreducible') # check the stationary distributions mut_distn_observed = mrate.R_to_distn(M) if not np.allclose(mut_distn_observed, mut_distn): raise ValueError( 'internal mut stationary distribution computation error') mutsel_distn_observed = mrate.R_to_distn(R) if not np.allclose(mutsel_distn_observed, mutsel_distn): raise ValueError( 'internal mut-sel stationary distribution computation error') # return the values return M, R
def get_response_content(fs): out = StringIO() np.set_printoptions(linewidth=200) # get the user defined variables n = fs.nstates # sample a random rate matrix v = divtime.sample_distribution(n) S = divtime.sample_symmetric_rate_matrix(n) R = mrate.to_gtr_halpern_bruno(S, v) # get some properties of the rate matrix and its re-symmetrization S = mrate.symmetrized(R) distn = mrate.R_to_distn(R) w, U = np.linalg.eigh(S) D = np.diag(U.T[-1])**2 D_inv = np.diag(np.reciprocal(U.T[-1]))**2 for t in (1.0, 2.0): P = scipy.linalg.expm(R * t) M = ndot(D**.5, scipy.linalg.expm(S * t), D**.5) M_star = ndot(D_inv**.5, scipy.linalg.expm(S * t), D_inv**.5) M_star_log = np.log(M_star) M_star_log_w, M_star_log_U = np.linalg.eigh(M_star_log) E = M * np.log(M_star) E_w, E_U = np.linalg.eigh(E) print >> out, 't:' print >> out, t print >> out print >> out, 'randomly sampled rate matrix R' print >> out, R print >> out print >> out, 'symmetrized matrix S' print >> out, S print >> out print >> out, 'stationary distribution diagonal D' print >> out, D print >> out print >> out, 'R = D^-1/2 S D^1/2' print >> out, ndot(D_inv**.5, S, D**.5) print >> out print >> out, 'probability matrix e^(R*t) = P' print >> out, P print >> out print >> out, 'P = D^-1/2 e^(S*t) D^1/2' print >> out, ndot(D_inv**.5, scipy.linalg.expm(S * t), D**.5) print >> out print >> out, 'pairwise distribution matrix M' print >> out, 'M = D^1/2 e^(S*t) D^1/2' print >> out, M print >> out print >> out, 'sum of entries of M' print >> out, np.sum(M) print >> out print >> out, 'M_star = D^-1/2 e^(S*t) D^-1/2' print >> out, M_star print >> out print >> out, 'entrywise logarithm logij(M_star)' print >> out, np.log(M_star) print >> out print >> out, 'Hadamard product M o logij(M_star) = E' print >> out, E print >> out print >> out, 'spectrum of M:' print >> out, np.linalg.eigvalsh(M) print >> out print >> out, 'spectrum of logij(M_star):' print >> out, M_star_log_w print >> out print >> out, 'corresponding eigenvectors of logij(M_star) as columns:' print >> out, M_star_log_U print >> out print >> out, 'spectrum of E:' print >> out, E_w print >> out print >> out, 'corresponding eigenvectors of E as columns:' print >> out, E_U print >> out print >> out, 'entrywise square roots of stationary distribution:' print >> out, np.sqrt(v) print >> out print >> out, 'sum of entries of E:' print >> out, np.sum(E) print >> out print >> out, 'mutual information:' print >> out, ctmcmi.get_mutual_information(R, t) print >> out print >> out return out.getvalue()
def get_response_content(fs): out = StringIO() np.set_printoptions(linewidth=200) # get the user defined variables n = fs.nstates # sample a random rate matrix v = divtime.sample_distribution(n) S = divtime.sample_symmetric_rate_matrix(n) R = mrate.to_gtr_halpern_bruno(S, v) # get some properties of the rate matrix and its re-symmetrization S = mrate.symmetrized(R) distn = mrate.R_to_distn(R) w, U = np.linalg.eigh(S) D = np.diag(U.T[-1])**2 D_inv = np.diag(np.reciprocal(U.T[-1]))**2 for t in (1.0, 2.0): P = scipy.linalg.expm(R*t) M = ndot(D**.5, scipy.linalg.expm(S*t), D**.5) M_star = ndot(D_inv**.5, scipy.linalg.expm(S*t), D_inv**.5) M_star_log = np.log(M_star) M_star_log_w, M_star_log_U = np.linalg.eigh(M_star_log) E = M * np.log(M_star) E_w, E_U = np.linalg.eigh(E) print >> out, 't:' print >> out, t print >> out print >> out, 'randomly sampled rate matrix R' print >> out, R print >> out print >> out, 'symmetrized matrix S' print >> out, S print >> out print >> out, 'stationary distribution diagonal D' print >> out, D print >> out print >> out, 'R = D^-1/2 S D^1/2' print >> out, ndot(D_inv**.5, S, D**.5) print >> out print >> out, 'probability matrix e^(R*t) = P' print >> out, P print >> out print >> out, 'P = D^-1/2 e^(S*t) D^1/2' print >> out, ndot(D_inv**.5, scipy.linalg.expm(S*t), D**.5) print >> out print >> out, 'pairwise distribution matrix M' print >> out, 'M = D^1/2 e^(S*t) D^1/2' print >> out, M print >> out print >> out, 'sum of entries of M' print >> out, np.sum(M) print >> out print >> out, 'M_star = D^-1/2 e^(S*t) D^-1/2' print >> out, M_star print >> out print >> out, 'entrywise logarithm logij(M_star)' print >> out, np.log(M_star) print >> out print >> out, 'Hadamard product M o logij(M_star) = E' print >> out, E print >> out print >> out, 'spectrum of M:' print >> out, np.linalg.eigvalsh(M) print >> out print >> out, 'spectrum of logij(M_star):' print >> out, M_star_log_w print >> out print >> out, 'corresponding eigenvectors of logij(M_star) as columns:' print >> out, M_star_log_U print >> out print >> out, 'spectrum of E:' print >> out, E_w print >> out print >> out, 'corresponding eigenvectors of E as columns:' print >> out, E_U print >> out print >> out, 'entrywise square roots of stationary distribution:' print >> out, np.sqrt(v) print >> out print >> out, 'sum of entries of E:' print >> out, np.sum(E) print >> out print >> out, 'mutual information:' print >> out, ctmcmi.get_mutual_information(R, t) print >> out print >> out return out.getvalue()