def test_mutate(self): """Probs mutate should return correct vector from input vector""" a = Alphabet('abc')**2 m = Probs([0.5, 0.25, 0.25, 0.1, 0.8, 0.1, 0.3, 0.6, 0.1], a) #because of fp math in accumulate, can't predict boundaries exactly #so add/subtract eps to get the result we expect eps = 1e-6 # a b b a c c a b c seq = array([0, 1, 1, 0, 2, 2, 0, 1, 2]) random_vec = array([0, .01, .8 - eps, 1, 1, .3, .05, .9 + eps, .95]) self.assertEqual(m.mutate(seq, random_vec), \ # a a b c c a a c c array([0,0,1,2,2,0,0,2,2])) #check that freq. distribution is about right seqs = array([m.mutate(seq) for i in range(1000)]) #WARNING: bool operators return byte arrays, whose sums wrap at 256! zero_count = asarray(seqs == 0, 'int32') sums = sum(zero_count, axis=0) #expect: 500, 100, 100, 500, 300, 300, 500, 100, 300 #std dev = sqrt(npq), which is sqrt(250), sqrt(90), sqrt(210) means = array([500, 100, 100, 500, 300, 300, 500, 100, 300]) var = array([250, 90, 90, 250, 210, 210, 250, 90, 210]) three_sd = 3 * sqrt(var) for obs, exp, sd in zip(sums, means, three_sd): assert exp - 2 * sd < obs < exp + 2 * sd
def test_fixNegsReflect(self): """Rates fixNegsReflect should reflect negatives across diagonal""" ab = Alphabet('ab')**2 #should leave matrix alone if no off-diagonal elements q = Rates([0, 0, 1, -1], ab) self.assertEqual(q.fixNegsReflect()._data, array([[0, 0], [1, -1]])) q = Rates([-2, 2, 1, -1], ab) self.assertEqual(q.fixNegsReflect()._data, array([[-2, 2], [1, -1]])) #should work if precisely one off-diag element in a pair is negative q = Rates([2, -2, 1, -1], ab) self.assertEqual(q.fixNegsReflect()._data, array([[0, 0], [3, -3]])) q = Rates([-1, 1, -2, 2], ab) self.assertEqual(q.fixNegsReflect()._data, array([[-3, 3], [0, -0]])) #should work if both off-diag elements in a pair are negative q = Rates([1, -1, -2, 2], ab) self.assertEqual(q.fixNegsReflect()._data, array([[-2, 2], [1, -1]])) q = Rates([2, -2, -1, 1], ab) self.assertEqual(q.fixNegsReflect()._data, array([[-1, 1], [2, -2]])) q = Rates( [[0, 3, -2, -1], [2, -1, 2, -3], [-1, -1, 2, 0], [-3, 2, 0, 1]], RnaPairs) q2 = q.fixNegsReflect() self.assertEqual(q2._data, \ array([[-7, 3, 1, 3], [ 2, -5, 3, 0], [ 2, 0, -2, 0], [ 1, 5, 0, -6]]))
def test_toCounts(self): """Probs toCounts should return counts object w/ right numbers""" a = Alphabet('abc')**2 m = Probs([0.5, 0.25, 0.25, 0.1, 0.8, 0.1, 0.3, 0.6, 0.1], a) obs = m.toCounts(30) assert isinstance(obs, Counts) exp = Counts([[5., 2.5, 2.5, 1, 8, 1, 3, 6, 1]], a) self.assertEqual(obs, exp)
def test_makeModel(self): """Probs makeModel should return correct substitution pattern""" a = Alphabet('abc')**2 m = Probs([0.5, 0.25, 0.25, 0.1, 0.8, 0.1, 0.3, 0.6, 0.1], a) obs = m.makeModel(array([0, 1, 1, 0, 2, 2])) exp = array([[0.5,0.25,0.25],[0.1,0.8,0.1],[0.1,0.8,0.1],\ [0.5,0.25,0.25],[0.3,0.6,0.1],[0.3,0.6,0.1]]) self.assertEqual(obs, exp)
def setUp(self): """Defines some standard test items.""" self.ab = Alphabet('ab') class abUsage(Usage): Alphabet = self.ab self.abUsage = abUsage
def test_getitem(self): """PairMatrix __getitem__ should translate indices and get from array""" n = self.named self.assertEqual(n['a'], array([1, 2])) self.assertEqual(n['b'], array([3, 4])) self.assertEqual(n['a', 'a'], 1) self.assertEqual(n['a', 'b'], 2) self.assertEqual(n['b', 'a'], 3) self.assertEqual(n['b', 'b'], 4) #WARNING: m[a][b] doesn't work b/c indices not translated! #must access as m[a,b] instead. filterwarnings("ignore", "using a non-integer") try: x = n['a']['b'] except (ValueError, IndexError): pass #should work even if SubAlphabets not the same a = Alphabet('ab') x = Alphabet('xyz') j = a * x m = PairMatrix([1, 2, 3, 4, 5, 6], j) self.assertEqual(m['a', 'x'], 1) self.assertEqual(m['a', 'y'], 2) self.assertEqual(m['a', 'z'], 3) self.assertEqual(m['b', 'x'], 4) self.assertEqual(m['b', 'y'], 5) self.assertEqual(m['b', 'z'], 6) #should work even if SubAlphabets are different types a = Alphabet([1, 2, 3]) b = Alphabet(['abc', 'xyz']) j = a * b m = PairMatrix([1, 2, 3, 4, 5, 6], j) self.assertEqual(m[1, 'abc'], 1) self.assertEqual(m[1, 'xyz'], 2) self.assertEqual(m[2, 'abc'], 3) self.assertEqual(m[2, 'xyz'], 4) self.assertEqual(m[3, 'abc'], 5) self.assertEqual(m[3, 'xyz'], 6) self.assertEqual(list(m[2]), [3, 4]) #gives KeyError if single item not present in first level self.assertRaises(KeyError, m.__getitem__, 'x')
def test_toRates(self): """Probs toRates should return log of probs, optionally normalized""" a = Alphabet('abc')**2 p = Probs([0.9, 0.05, 0.05, 0.1, 0.85, 0.05, 0.02, 0.02, 0.96], a) assert p.isValid() r = p.toRates() assert isinstance(r, Rates) assert r.isValid() assert not r.isComplex() self.assertEqual(r._data, logm(p._data)) r_norm = p.toRates(normalize=True) self.assertFloatEqual(trace(r_norm._data), -1.0)
def test_ne(self): """PairMatrix test for inequality should check all elements""" p = self.ab_pairs a = PairMatrix.empty(p) b = PairMatrix.empty(p) c = PairMatrix([1, 2, 3, 4], p) d = PairMatrix([1, 2, 3, 4], p) assert a != c assert a == b assert c == d #Note: still compare equal if alphabets are different x = Alphabet('xy') x = x * x y = PairMatrix([1, 2, 3, 4], x) assert y == c #should check all elements, not just first c = PairMatrix([1, 1, 1, 1], p) d = PairMatrix([1, 1, 1, 4], p) assert c != d
def test_eq(self): """Pairmatrix test for equality should check all elements""" p = self.ab_pairs a = PairMatrix.empty(p) b = PairMatrix.empty(p) assert a is not b self.assertEqual(a, b) c = PairMatrix([1, 2, 3, 4], p) d = PairMatrix([1, 2, 3, 4], p) assert c is not d self.assertEqual(c, d) self.assertNotEqual(a, c) #Note: still compare equal if alphabets are different x = Alphabet('xy') x = x * x y = PairMatrix([1, 2, 3, 4], x) self.assertEqual(y, c) #should check all elements, not just first c = PairMatrix([1, 1, 1, 1], p) d = PairMatrix([1, 1, 1, 4], p) assert c is not d self.assertNotEqual(c, d)
def _with(self, motifs): a = Alphabet._with(self, motifs) a.__class__ = type(self) a._gc = self._gc return a
def __init__(self, motifset, Gap=IUPAC_gap, Missing=IUPAC_missing,\ Gaps=None, Sequence=None, Ambiguities=None, label=None, Complements=None, Pairs=None, MWCalculator=None, \ add_lower=False, preserve_existing_moltypes=False, \ make_alphabet_group=False, ModelSeq=None): """Returns a new MolType object. Note that the parameters are in flux. Currently: motifset: Alphabet or sequence of items in the default alphabet. Does not include degenerates. Gap: default gap symbol Missing: symbol for missing data Gaps: any other symbols that should be treated as gaps (doesn't have to include Gap or Missing; they will be silently added) Sequence: Class for constructing sequences. Ambiguities: dict of char:tuple, doesn't include gaps (these are hard-coded as - and ?, and added later. label: text label, don't know what this is used for. Unnecessary? Complements: dict of symbol:symbol showing how the non-degenerate single characters complement each other. Used for constructing on the fly the complement table, incl. support for mustPair and canPair. Pairs: dict in which keys are pairs of symbols that can pair with each other, values are True (must pair) or False (might pair). Currently, the meaning of GU pairs as 'weak' is conflated with the meaning of degenerate symbol pairs (which might pair with each other but don't necessarily, depending on how the symbol is resolved). This should be refactored. MWCalculator: f(seq) -> molecular weight. add_lower: if True (default: False) adds the lowercase versions of everything into the alphabet. Slated for deletion. preserve_existing_moltypes: if True (default: False), does not set the MolType of the things added in **kwargs to self. make_alphabet_group: if True, makes an AlphabetGroup relating the various alphabets to one another. ModelSeq: sequence type for modeling Note on "Degenerates" versus "Ambiguities": self.Degenerates contains _only_ mappings for degenerate symbols, whereas self.Ambiguities contains mappings for both degenerate and non-degenerate symbols. Sometimes you want one, sometimes the other, so both are provided. """ self.Gap = Gap self.Missing = Missing self.Gaps = frozenset([Gap, Missing]) if Gaps: self.Gaps = self.Gaps.union(frozenset(Gaps)) self.label = label #set the sequence constructor if Sequence is None: Sequence = ''.join #safe default string constructor elif not preserve_existing_moltypes: Sequence.MolType = self self.Sequence = Sequence #set the ambiguities ambigs = {self.Missing:tuple(motifset)+(self.Gap,),self.Gap:(self.Gap,)} if Ambiguities: ambigs.update(Ambiguities) for c in motifset: ambigs[c] = (c,) self.Ambiguities = ambigs #set Complements -- must set before we make the alphabet group self.Complements = Complements or {} if make_alphabet_group: #note: must use _original_ ambiguities here self.Alphabets = AlphabetGroup(motifset, Ambiguities, \ MolType=self) self.Alphabet = self.Alphabets.Base else: if isinstance(motifset, Enumeration): self.Alphabet = motifset elif max(len(motif) for motif in motifset) == 1: self.Alphabet = CharAlphabet(motifset, MolType=self) else: self.Alphabet = Alphabet(motifset, MolType=self) #set the other properties self.Degenerates = Ambiguities and Ambiguities.copy() or {} self.Degenerates[self.Missing] = ''.join(motifset)+self.Gap self.Matches = make_matches(motifset, self.Gaps, self.Degenerates) self.Pairs = Pairs and Pairs.copy() or {} self.Pairs.update(make_pairs(Pairs, motifset, self.Gaps, \ self.Degenerates)) self.MWCalculator = MWCalculator #add lowercase characters, if we're doing that if add_lower: self._add_lowercase() #cache various other data that make the calculations faster self._make_all() self._make_comp_table() # a gap can be a true gap char or a degenerate character, typically '?' # we therefore want to ensure consistent treatment across the definition # of characters as either gap or degenerate self.GapString = ''.join(self.Gaps) strict_gap = "".join(set(self.GapString) - set(self.Degenerates)) self.stripDegenerate = FunctionWrapper( keep_chars(strict_gap+''.join(self.Alphabet))) self.stripBad = FunctionWrapper(keep_chars(''.join(self.All))) to_keep = set(self.Alphabet) ^ set(self.Degenerates) - set(self.Gaps) self.stripBadAndGaps = FunctionWrapper(keep_chars(''.join(to_keep))) #make inverse degenerates from degenerates #ensure that lowercase versions also exist if appropriate inv_degens = {} for key, val in self.Degenerates.items(): inv_degens[frozenset(val)] = key.upper() if add_lower: inv_degens[frozenset(''.join(val).lower())] = key.lower() for m in self.Alphabet: inv_degens[frozenset(m)] = m if add_lower: inv_degens[frozenset(''.join(m).lower())] = m.lower() for m in self.Gaps: inv_degens[frozenset(m)] = m self.InverseDegenerates = inv_degens #set array type for modeling alphabets try: self.ArrayType = self.Alphabet.ArrayType except AttributeError: self.ArrayType = None #set modeling sequence self.ModelSeq = ModelSeq
def setUp(self): """Define standard alphabets.""" self.abc = Alphabet('abc') self.abc_pairs = self.abc**2
def setUp(self): """Define an alphabet and some probs.""" self.ab = Alphabet('ab') self.ab_pairs = self.ab**2
def setUp(self): """Define standard alphabet and matrices for tests.""" self.ab = Alphabet('ab') self.ab_pairs = self.ab * self.ab self.empty = PairMatrix([0, 0, 0, 0], self.ab_pairs) self.named = PairMatrix([[1, 2], [3, 4]], self.ab_pairs, 'name')
from cogent.maths.matrix_logarithm import logm from cogent.maths.matrix_exponentiation import FastExponentiator as expm #need to find test directory to get access to the tests of the Freqs interface try: from os import getcwd from sys import path from os.path import sep, join test_path = getcwd().split(sep) index = test_path.index('tests') fields = test_path[:index + 1] + ["test_maths"] test_path = sep + join(*fields) path.append(test_path) from test_stats.test_util import StaticFreqsTestsI my_alpha = Alphabet('abcde') class myUsage(Usage): Alphabet = my_alpha class UsageAsFreqsTests(StaticFreqsTestsI, TestCase): """Note that the remaining Usage methods are tested here.""" ClassToTest = myUsage except ValueError: #couldn't find directory pass __author__ = "Rob Knight" __copyright__ = "Copyright 2007-2016, The Cogent Project" __credits__ = ["Rob Knight", "Daniel McDonald"] __license__ = "GPL"