Пример #1
0
 def make_DF(self):
     df = DF(self.name, self.aln_length)
     for id in self.fasta_reader.iterkeys():
         r = self.fasta_reader[id]
         for nt in DF.nucleotides():
             # TODO: make find_all_indices iterative to be mem-efficient
             seq = r.seq.tostring().replace("U", "T")
             positions = find_all_indices(seq, nt)
             df.add_to_vec(nt=nt, positions=positions, counts=[1] * len(positions))
     # 			for gapped_pos,nt in enumerate(r.seq):
     # 				df.add_to_vec(nt=nt, positions=[gapped_pos], counts=[1])
     return df
Пример #2
0
 def subsample(self, se):
     df = DF(self.pyro.name, self.pyro.aln_length)
     keys = self.pyro.keys()
     for id in random.sample(keys, min(se, len(keys))):
         # to prevent "sample larger than population error" use min()
         r = self.pyro[id]
         for nt in DF.nucleotides():
             seq = r.seq.tostring().replace("U", "T")
             positions = find_all_indices(seq, nt)
             df.add_to_vec(nt=nt, positions=positions, counts=[1] * len(positions))
             # for i,ecoli_pos in enumerate(SILVA.Ecoli1542_SILVA100):
             # df.add_to_vec(nt=r.seq[ecoli_pos], positions=[i], counts=[1])
     return df