def test_taxaOk(self): """BirthDeathModel TaxaOk should return True if taxa not exceeded""" b = BirthDeathModel(0.1, 0.2, 0.3, MaxTaxa=5) born_alternate = FakeRandom([1, 1, 1, 0], True) born_only = FakeRandom([1, 0], True) kill_only = FakeRandom([0, 1, 0, 1], True) #start off with single taxon assert b.taxaOk() #taxa are OK if there are a few b.step(born_only) #now 2 taxa assert b.taxaOk() b.step(born_only) #now 4 taxa assert b.taxaOk() b.step(born_only) #now 8 taxa assert not b.taxaOk() b.MaxTaxa = 8 assert not b.taxaOk() b.MaxTaxa = 9 assert b.taxaOk() b.MaxTaxa = 17 assert b.taxaOk() b.step(born_only) assert b.taxaOk() b.step(born_only) assert not b.taxaOk() #ok if no maximum b.MaxTaxa = None assert b.taxaOk() #not ok if there are no taxa left b.step(kill_only) assert not b.taxaOk() #still not OK if not MaxTaxa b.MaxTaxa = None assert not b.taxaOk()
def test_call(self): """BirthDeathModel call should produce hand-calculated trees""" m = BirthDeathModel(0.01, 0.005, 0.1, MaxTaxa=10) r = FakeRandom(\ [1,0,\ 1,1, 1,1,\ 1,0, 0,0,\ 0,0, 0,0, 1,0,\ 0,0, 0,0, 0,1, 0,0, \ 1,0, 0,0, 0,0,\ 1,0, 0,0, 0,0, 1,0, \ 1,0, 1,0, 0,1, 1,1, 1,0, 1,0, \ 1,1, 1,1, 1,1, 1,1, 1,0, 1,1, 1,1, 1,1, 1,1], True) m = BirthDeathModel(0.1, 0.5, 1, MaxTaxa=10) result = m(filter=False, random_f=r) self.assertEqual([i.Length for i in result.traverse()], \ [2,2,2,2,2,1,1,1,2,2,2,2]) #try it with pruning m = BirthDeathModel(0.1, 0.5, 1, MaxTaxa=10) result = m(filter=True, random_f=r) self.assertEqual([i.Length for i in result.traverse()], \ [2,2,2,2,1,1,2,2,2,2]) #try it with fewer taxa m = BirthDeathModel(0.1, 0.5, 1, MaxTaxa=4) result = m(filter=True, random_f=r) self.assertEqual([i.Length for i in result.traverse()], \ [2,2,1,1])
def toSeq(self, Bases=True, truncate=True): """Translates flowgram to sequence and returns sequence object if Bases is True then a sequence object will be made using self.Bases instead of translating the flowgram truncate: if True strip off lowercase chars (low quality bases) """ if Bases and hasattr(self, "Bases"): seq = self.Bases else: seq = [] if self.floworder is None: raise ValueError, "must have self.floworder set" key = FakeRandom(self.floworder, True) flows_since_last = 0 for n in self.flowgram: signal = int(round(n)) seq.extend([key()] * signal) if (signal > 0): flows_since_last = 0 else: flows_since_last += 1 if (flows_since_last == 4): seq.extend('N') flows_since_last = 0 seq = ''.join(seq) #cache the result for next time self.Bases = seq if (truncate): seq = str(seq) seq = seq.rstrip("acgtn") seq = seq.lstrip("actgn") return Sequence(seq, Name=self.Name)
def test_step(self): """BirthDeathModel step should match hand-calculated results""" m = BirthDeathModel(BirthProb=0.1, DeathProb=0.2, TimePerStep=1) born_and_died = FakeRandom([0], True) born_only = FakeRandom([1, 0], True) died_only = FakeRandom([0, 1], True) neither = FakeRandom([1], True) kill_alternate = FakeRandom([0, 1, 1, 1], True) born_alternate = FakeRandom([1, 1, 1, 0], True) #check that with neither birth nor death, we just continue m.step(neither) self.assertEqual(len(m.Tree.Children), 0) #check that with born_only we get a duplication m.step(born_only) self.assertEqual(len(m.Tree.Children), 2) assert m.Tree not in m.CurrTaxa for i in m.CurrTaxa: assert i.Parent is m.Tree self.assertEqual(i.Length, 1) #check that with a second round of born_only we duplicate again m.step(born_only) self.assertEqual(len(m.Tree.Children), 2) self.assertEqual(len(list(m.Tree.traverse())), 4) for i in m.Tree.traverse(): self.assertEqual(i.Length, 1) for i in m.Tree.Children: self.assertEqual(i.Length, 1) #check that branch lengths add correctly for i in range(4): m.step(neither) self.assertEqual(len(m.CurrTaxa), 4) self.assertEqual(len(m.Tree.Children), 2) self.assertEqual(len(list(m.Tree.traverse())), 4) for i in m.Tree.traverse(): self.assertEqual(i.Length, 5) for i in m.Tree.Children: self.assertEqual(i.Length, 1) #check that we can kill offspring correctly m.step(kill_alternate) self.assertEqual(len(m.CurrTaxa), 2) #make sure we killed the right children m.Tree.assignIds() for i in m.Tree.Children: #note that killing a child doesn't remove it, just stops it changing self.assertEqual(len(i.Children), 2) self.assertEqual(i.Children[0].Length, 5) self.assertEqual(i.Children[1].Length, 6) self.assertEqual([i.Length for i in m.Tree.traverse()], \ [5,6,5,6]) #make sure that born_and_died does the same thing as neither m.step(born_and_died) self.assertEqual([i.Length for i in m.Tree.traverse()], \ [5,7,5,7]) m.step(neither) self.assertEqual([i.Length for i in m.Tree.traverse()], \ [5,8,5,8]) #check that only CurrTaxa are brought forward self.assertEqual([i.Length for i in m.CurrTaxa], [8, 8]) #check that we can duplicate a particular taxon m.step(born_alternate) self.assertEqual([i.Length for i in m.CurrTaxa], [9, 1, 1]) self.assertEqual(m.CurrTaxa[1].Parent.Length, 8) #check that we can kill 'em all m.step(died_only) self.assertEqual(len(m.CurrTaxa), 0)
def seqs_to_flows(seqs, keyseq = default_keyseq, floworder = default_floworder, numflows = None, probs = None, bin_size = 0.01, header_info = {}): """ Transfrom a sequence into an ideal flow seqs: a list of name sequence object tuples (name,tuple) keyseq: the flowgram key Sequence floworder: The chars needed to convert seq to flow numflows: number of total flows in each flowgram, if it is specified the flowgram will be padded to that number probs: dictionary defining the probability distribution for each homopolymer WARNING:each distributions probabilities must add to 1.0 """ flows = [] homopolymer_counter = 1.0 if probs: for p in probs: if round(sum(probs[p]),1) != 1.0: raise ValueError, 'probs[%s] does not add to 1.0' % p for name,seq in seqs: flow_seq = FakeRandom(floworder,True) flow = [] seq_len = len(seq) for i, nuc in enumerate(seq): if i < seq_len-1 and seq[i+1] == nuc: homopolymer_counter += 1.0 else: while flow_seq() != nuc: if probs is None: val = 0.0 else: val = pick_from_prob_density(probs[0],bin_size) flow.append(val) if (probs is None) or (homopolymer_counter > 9): val = homopolymer_counter else: val = pick_from_prob_density(probs[int(homopolymer_counter)],bin_size) flow.append(val) homopolymer_counter = 1.0 len_flow = len(flow) len_order = len(floworder) if numflows is not None and numflows % len_order != 0: raise ValueError, "numflows must be divisable by the length of floworder" if (len_flow % len_order != 0): right_missing = len_order - (len_flow % len_order) if numflows != (len_flow + right_missing) and numflows is not None: right_missing += (numflows - (len_flow+right_missing)) if probs is None: flow.extend([0.0]*right_missing) else: for i in range(0,right_missing): flow.append(pick_from_prob_density(probs[0],bin_size)) flows.append((name, Flowgram(flow, id, keyseq, floworder))) if keyseq is not None: keylen = len(keyseq) else: keylen = None header_info.update({'Key Sequence':keyseq,'Flow Chars':floworder, 'Key Length':keylen}) return FlowgramCollection(flows, header_info = header_info)