Пример #1
0
	def add_read_to_vec(self, read, copy=None):
		"""
		read is a Read object, if copy is None, then read.copy is used
		"""
		for i,s in enumerate(read.seq):
			# the i-th non-gapped position for ref_seq_id starting at offset read.offset
			gapped_pos = self.refmap.ungapped_to_gapped(read.ref_seq_id, read.offset + i)
			DF.add_to_vec(self, nt=s, positions=[gapped_pos], counts=[read.copy if copy is None else copy])
Пример #2
0
	def add_read_to_vec_using_ref(self, read):
		"""
		match is a BowTieMatch
		instead of adding the match's seq itself...use the ref seq >____<
		"""
		i = read.offset
		for p in self.refmap.gap_map[read.ref_seq_id][read.offset:(read.offset+len(read.seq))]:
			s = self.refmap.fasta[read.ref_seq_id].seq[i]
			if s=='U': s='T'
			if s not in ('A','T','C','G'): s='N'
			DF.add_to_vec(self, nt=s, positions=[p], counts=[read.copy])
			i += 1
Пример #3
0
 def make_DF(self):
     df = DF(self.name, self.aln_length)
     for id in self.fasta_reader.iterkeys():
         r = self.fasta_reader[id]
         for nt in DF.nucleotides():
             # TODO: make find_all_indices iterative to be mem-efficient
             seq = r.seq.tostring().replace("U", "T")
             positions = find_all_indices(seq, nt)
             df.add_to_vec(nt=nt, positions=positions, counts=[1] * len(positions))
     # 			for gapped_pos,nt in enumerate(r.seq):
     # 				df.add_to_vec(nt=nt, positions=[gapped_pos], counts=[1])
     return df
Пример #4
0
 def subsample(self, se):
     df = DF(self.pyro.name, self.pyro.aln_length)
     keys = self.pyro.keys()
     for id in random.sample(keys, min(se, len(keys))):
         # to prevent "sample larger than population error" use min()
         r = self.pyro[id]
         for nt in DF.nucleotides():
             seq = r.seq.tostring().replace("U", "T")
             positions = find_all_indices(seq, nt)
             df.add_to_vec(nt=nt, positions=positions, counts=[1] * len(positions))
             # for i,ecoli_pos in enumerate(SILVA.Ecoli1542_SILVA100):
             # df.add_to_vec(nt=r.seq[ecoli_pos], positions=[i], counts=[1])
     return df
Пример #5
0
	def __init__(self, name, refmap, *args):
		DF.__init__(self, name, refmap.aln_length, *args)
		self.refmap = refmap