Exemplo n.º 1
0
 def test_timeForSimilarity(self):
     """Rates timeToSimilarity should return correct time"""
     a = self.abc_pairs
     p = Probs([0.75, 0.1, 0.15, 0.2, 0.7, 0.1, 0.05, 0.15, 0.8], a)
     q = p.toRates()
     d = 0.5
     t = q.timeForSimilarity(d)
     x = expm(q._data)(t)
     self.assertFloatEqual(average(diagonal(x), axis=0), d)
     t = q.timeForSimilarity(d, array([1/3.0]*3))
     x = expm(q._data)(t)
     self.assertFloatEqual(average(diagonal(x), axis=0), d)
     self.assertEqual(q.timeForSimilarity(1), 0)
Exemplo n.º 2
0
 def test_timeForSimilarity(self):
     """Rates timeToSimilarity should return correct time"""
     a = self.abc_pairs
     p = Probs([0.75, 0.1, 0.15, 0.2, 0.7, 0.1, 0.05, 0.15, 0.8], a)
     q = p.toRates()
     d = 0.5
     t = q.timeForSimilarity(d)
     x = expm(q._data)(t)
     self.assertFloatEqual(average(diagonal(x), axis=0), d)
     t = q.timeForSimilarity(d, array([1 / 3.0] * 3))
     x = expm(q._data)(t)
     self.assertFloatEqual(average(diagonal(x), axis=0), d)
     self.assertEqual(q.timeForSimilarity(1), 0)
Exemplo n.º 3
0
    def fixNegsConstrainedOpt(self, to_minimize=norm_diff, badness=1e6):
        """Uses constrained minimization to find approx q matrix.

        to_minimize: metric for comparing orig result and new result.

        badness: scale factor for penalizing negative off-diagonal values.
        """
        if not sum_neg_off_diags(self._data):
            return self
        q = ravel(without_diag(self._data))
        p = expm(self._data)(t=1)

        def err_f(q):
            new_q = reshape(array(q), (4, 3))
            new_q = with_diag(new_q, -sum(new_q, 1))
            p_new = expm(new_q)(t=1)
            result = to_minimize(ravel(p), ravel(p_new))
            if q.min() < 0:
                result += -q.min() * badness
            return result

        a = array(q)
        xmin = fmin(func=err_f, x0=a, disp=0)
        r = reshape(xmin, (4, 3))
        new_q = with_diag(r, -sum(r, 1))
        return self.__class__(new_q, self.Alphabet)
Exemplo n.º 4
0
    def toProbs(self, time=1.0):
        """Returns probs at exp(self*scale_factor).
        
        The way this works is by diagonalizing the rate matrix so that u is
        the matrix with eigenvectors as columns, v is a vector of eigenvalues,
        and w is the inverse of u. u * diag(v) * w reconstructs the original
        rate matrix. u * diag(exp(v*t)) * w exponentiates the rate matrix to
        time t.

        This is more expensive than a single exponentiation if the rate matrix
        is going to be sxponentiated only once, but faster if it is to be
        exponentiated to many different time points.

        Note that the diagonalization is not the same as the svd.

        If the diagonalization fails, we use the naive version of just
        multiplying the rate matrix by the time and exponentiating.
        """
        try:
            u, v, w = self._diagonalized
            #scale v to the right time by exp(v_0*t)
            v = diag(exp(v * time))
            return Probs(dot(dot(u,v), w), self.Alphabet)
        except:
            return Probs(expm(self._data)(time), self.Alphabet)
Exemplo n.º 5
0
    def toProbs(self, time=1.0):
        """Returns probs at exp(self*scale_factor).
        
        The way this works is by diagonalizing the rate matrix so that u is
        the matrix with eigenvectors as columns, v is a vector of eigenvalues,
        and w is the inverse of u. u * diag(v) * w reconstructs the original
        rate matrix. u * diag(exp(v*t)) * w exponentiates the rate matrix to
        time t.

        This is more expensive than a single exponentiation if the rate matrix
        is going to be sxponentiated only once, but faster if it is to be
        exponentiated to many different time points.

        Note that the diagonalization is not the same as the svd.

        If the diagonalization fails, we use the naive version of just
        multiplying the rate matrix by the time and exponentiating.
        """
        try:
            u, v, w = self._diagonalized
            #scale v to the right time by exp(v_0*t)
            v = diag(exp(v * time))
            return Probs(dot(dot(u, v), w), self.Alphabet)
        except:
            return Probs(expm(self._data)(time), self.Alphabet)
Exemplo n.º 6
0
 def err_f(q):
     new_q = reshape(array(q), (4, 3))
     new_q = with_diag(new_q, -sum(new_q, 1))
     p_new = expm(new_q)(t=1)
     result = to_minimize(ravel(p), ravel(p_new))
     if q.min() < 0:
         result += -q.min() * badness
     return result
Exemplo n.º 7
0
 def err_f(q):
     new_q = reshape(array(q), (4,3))
     new_q = with_diag(new_q, -sum(new_q, 1))
     p_new = expm(new_q)(t=1)
     result = to_minimize(ravel(p), ravel(p_new))
     if q.min() < 0:
         result += -q.min() * badness
     return result
Exemplo n.º 8
0
 def _make_error_f(self, to_minimize):
     """Make error function whose minimization estimates q = ln(p)."""
     p = expm(self._data)(t=1)
     BIG = 1e10
     def result(q):
         new_q = reshape(q, (4,4))
         neg_sum = sum_neg_off_diags(new_q)
         p_new = expm(new_q)(t=1)
         return to_minimize(ravel(p), ravel(p_new)) - (BIG * neg_sum) \
             + (BIG * sum(abs(sum(new_q,1))))
     return result
Exemplo n.º 9
0
 def _make_error_f(self, to_minimize):
     """Make error function whose minimization estimates q = ln(p)."""
     p = expm(self._data)(t=1)
     BIG = 1e10
     def result(q):
         new_q = reshape(q, (4,4))
         neg_sum = sum_neg_off_diags(new_q)
         p_new = expm(new_q)(t=1)
         return to_minimize(ravel(p), ravel(p_new)) - (BIG * neg_sum) \
             + (BIG * sum(abs(sum(new_q,1))))
     return result
Exemplo n.º 10
0
    def test_get_psub_rate_matrix(self):
        """lf should return consistent rate matrix and psub"""
        lf = self.submodel.makeLikelihoodFunction(self.tree)
        lf.setAlignment(self.data)
        Q = lf.getRateMatrixForEdge('NineBande')
        P = lf.getPsubForEdge('NineBande')
        self.assertFloatEqual(expm(Q.array)(1.0), P.array)

        # should fail for a discrete Markov model
        dm = substitution_model.DiscreteSubstitutionModel(DNA.Alphabet)
        lf = dm.makeLikelihoodFunction(self.tree)
        lf.setAlignment(self.data)
        self.assertRaises(Exception, lf.getRateMatrixForEdge, 'NineBande')
Exemplo n.º 11
0
def make_p(length, coord, val):
    """returns a probability matrix with value set at coordinate in
    instantaneous rate matrix"""
    Q = ones((4, 4), float) * 0.25  # assumes equi-frequent mprobs at root
    for i in range(4):
        Q[i, i] = 0.0
    Q[coord] *= val
    row_sum = Q.sum(axis=1)
    scale = 1 / (.25 * row_sum).sum()
    for i in range(4):
        Q[i, i] -= row_sum[i]
    Q *= scale
    return expm(Q)(length)
Exemplo n.º 12
0
def make_p(length, coord, val):
    """returns a probability matrix with value set at coordinate in
    instantaneous rate matrix"""
    Q = ones((4,4), float)*0.25 # assumes equi-frequent mprobs at root
    for i in range(4):
        Q[i,i] = 0.0
    Q[coord] *= val
    row_sum = Q.sum(axis=1)
    scale = 1/(.25*row_sum).sum()
    for i in range(4):
        Q[i,i] -= row_sum[i]
    Q *= scale
    return expm(Q)(length)
Exemplo n.º 13
0
 def test_get_psub_rate_matrix(self):
     """lf should return consistent rate matrix and psub"""
     lf = self.submodel.makeLikelihoodFunction(self.tree)
     lf.setAlignment(self.data)
     Q = lf.getRateMatrixForEdge('NineBande')
     P = lf.getPsubForEdge('NineBande')
     self.assertFloatEqual(expm(Q.array)(1.0), P.array)
     
     # should fail for a discrete Markov model
     dm = substitution_model.DiscreteSubstitutionModel(DNA.Alphabet)
     lf = dm.makeLikelihoodFunction(self.tree)
     lf.setAlignment(self.data)
     self.assertRaises(Exception, lf.getRateMatrixForEdge, 'NineBande')
Exemplo n.º 14
0
 def test_toProbs(self):
     """Rates toProbs should return correct probability matrix"""
     a = self.abc_pairs
     p = Probs([0.75, 0.1, 0.15, 0.2, 0.7, 0.1, 0.05, 0.1, 0.85], a)
     q = p.toRates()
     self.assertEqual(q._data, logm(p._data))
     p2 = q.toProbs()
     self.assertFloatEqual(p2._data, p._data)
     
     #test a case that didn't work for DNA
     q = Rates(array(
         [[-0.64098451,  0.0217681 ,  0.35576469,  0.26345171],
          [ 0.31144238, -0.90915091,  0.25825858,  0.33944995],
          [ 0.01578521,  0.43162879, -0.99257581,  0.54516182],
          [ 0.13229986,  0.04027147,  0.05817791, -0.23074925]]),
         DnaPairs)
     self.assertFloatEqual(q.toProbs(0.5)._data, expm(q._data)(t=0.5))
Exemplo n.º 15
0
    def test_toProbs(self):
        """Rates toProbs should return correct probability matrix"""
        a = self.abc_pairs
        p = Probs([0.75, 0.1, 0.15, 0.2, 0.7, 0.1, 0.05, 0.1, 0.85], a)
        q = p.toRates()
        self.assertEqual(q._data, logm(p._data))
        p2 = q.toProbs()
        self.assertFloatEqual(p2._data, p._data)

        #test a case that didn't work for DNA
        q = Rates(
            array([[-0.64098451, 0.0217681, 0.35576469, 0.26345171],
                   [0.31144238, -0.90915091, 0.25825858, 0.33944995],
                   [0.01578521, 0.43162879, -0.99257581, 0.54516182],
                   [0.13229986, 0.04027147, 0.05817791, -0.23074925]]),
            DnaPairs)
        self.assertFloatEqual(q.toProbs(0.5)._data, expm(q._data)(t=0.5))
Exemplo n.º 16
0
def test_heuristics(p_range=None, num_to_do=71, heuristics=None):
    if p_range is None:
        p_range = [0.6]
    if heuristics is None:
        heuristics = [
            'fixNegsDiag', 'fixNegsEven', 'fixNegsReflect',
            'fixNegsConstrainedOpt'
        ]
    num_heuristics = len(heuristics)
    print '\t'.join(['p'] + heuristics)
    for p in p_range:
        result = zeros((num_to_do, num_heuristics), Float64)
        has_nonzero = 0
        i = 0
        while i < num_to_do:
            curr_row = result[i]
            random_p = Probs.random(DnaPairs, p)
            q = random_p.toRates()
            if not q.hasNegOffDiags():
                continue
            has_nonzero += 1
            #print "P:"
            #print random_p._data
            #print "Q:"
            #print q._data
            i += 1
            for j, h in enumerate(heuristics):
                #print "HEURISTIC: ", h
                q_corr = getattr(q, h)()
                #print "CORRECTED Q: "
                #print q_corr._data
                p_corr = expm(q_corr._data)(t=1)
                #print "CORRECTED P:"
                #print p_corr
                dist = norm_diff(p_corr, random_p._data)
                #print "DISTANCE: ", dist
                curr_row[j] = dist
        averages = average(result)
        print p, '\t', '\t'.join(map(str, averages))
Exemplo n.º 17
0
    def fixNegsConstrainedOpt(self, to_minimize=norm_diff, badness=1e6):
        """Uses constrained minimization to find approx q matrix.

        to_minimize: metric for comparing orig result and new result.

        badness: scale factor for penalizing negative off-diagonal values.
        """
        if not sum_neg_off_diags(self._data):
            return self
        q = ravel(without_diag(self._data))
        p = expm(self._data)(t=1)
        def err_f(q):
            new_q = reshape(array(q), (4,3))
            new_q = with_diag(new_q, -sum(new_q, 1))
            p_new = expm(new_q)(t=1)
            result = to_minimize(ravel(p), ravel(p_new))
            if q.min() < 0:
                result += -q.min() * badness
            return result
        a = array(q)
        xmin = fmin(func=err_f, x0=a, disp=0)
        r = reshape(xmin, (4,3))
        new_q = with_diag(r, -sum(r, 1))
        return self.__class__(new_q, self.Alphabet)
Exemplo n.º 18
0
def test_heuristics(p_range=None, num_to_do=71, heuristics=None):
    if p_range is None:
        p_range = [0.6]
    if heuristics is None:
        heuristics = ['fixNegsDiag', 'fixNegsEven', 'fixNegsReflect', 'fixNegsConstrainedOpt']
    num_heuristics = len(heuristics)
    print '\t'.join(['p'] + heuristics)
    for p in p_range:
        result = zeros((num_to_do, num_heuristics), Float64)
        has_nonzero = 0
        i = 0
        while i < num_to_do:
            curr_row = result[i]
            random_p = Probs.random(DnaPairs, p)
            q = random_p.toRates()
            if not q.hasNegOffDiags():
                continue
            has_nonzero += 1
            #print "P:"
            #print random_p._data
            #print "Q:"
            #print q._data
            i += 1
            for j, h in enumerate(heuristics):
                #print "HEURISTIC: ", h
                q_corr = getattr(q, h)()
                #print "CORRECTED Q: "
                #print q_corr._data
                p_corr = expm(q_corr._data)(t=1)
                #print "CORRECTED P:"
                #print p_corr
                dist = norm_diff(p_corr, random_p._data)
                #print "DISTANCE: ", dist
                curr_row[j] = dist
        averages = average(result)
        print p, '\t', '\t'.join(map(str, averages))
Exemplo n.º 19
0
 def result(q):
     new_q = reshape(q, (4,4))
     neg_sum = sum_neg_off_diags(new_q)
     p_new = expm(new_q)(t=1)
     return to_minimize(ravel(p), ravel(p_new)) - (BIG * neg_sum) \
         + (BIG * sum(abs(sum(new_q,1))))
Exemplo n.º 20
0
 def similarity_f(t):
     return abs(sum(diagonal(expm(q)(t)*freqs))-similarity)
Exemplo n.º 21
0
 def similarity_f(t):
     return abs(average(diagonal(expm(q)(t)))-similarity)
Exemplo n.º 22
0
 def similarity_f(t):
     return abs(average(diagonal(expm(q)(t))) - similarity)
Exemplo n.º 23
0
 def similarity_f(t):
     return abs(sum(diagonal(expm(q)(t) * freqs)) - similarity)
Exemplo n.º 24
0
 def result(q):
     new_q = reshape(q, (4, 4))
     neg_sum = sum_neg_off_diags(new_q)
     p_new = expm(new_q)(t=1)
     return to_minimize(ravel(p), ravel(p_new)) - (BIG * neg_sum) \
         + (BIG * sum(abs(sum(new_q,1))))