Пример #1
0
	def _do_search(self, seq, seqdb, args):

		seq_file = tempfile.NamedTemporaryFile()
		seqdb_file = tempfile.NamedTemporaryFile()
		out_file = tempfile.NamedTemporaryFile()
		hmm_file = tempfile.NamedTemporaryFile()

		SeqIO.write(seq, seq_file, 'fasta')
		SeqIO.write(seqdb, seqdb_file, 'fasta')
		seq_file.flush()
		seqdb_file.flush()

		p = Popen(['jackhmmer', '--qformat', 'fasta', '--tformat', 'fasta', 
			'--chkhmm', hmm_file.name, '--domtblout', out_file.name,] + args + 
			[seq_file.name, seqdb_file.name,], 
				stdout=PIPE, stdin=PIPE, stderr=PIPE)
		out = p.communicate()

		self.matches += matchfile.load(out_file, seq, self.seqdb)

		#load the hmms
		try:
			i = 1
			while True:
				f = "{}-{}.hmm".format(hmm_file.name, i)
				self.hmms.append(hmmfile.read(f)[0])
				os.remove(f)
				i += 1
		except IOError:
			pass

		out_file.close()
		seq_file.close()
		seqdb_file.close()
Пример #2
0
	def search(self, hmm, targets, **kwargs):
		"""Perform the search
				hmm: a file name or an HMM object which has been loaded from a file
				targets: the sequences to search - a fasta filename or one or 
					more Bio.SeqRecord

				If the hmm performs searches on Amino Acids and and of the inputs are
				DNA sequences, 6-frame translations will be produced automatically
				Reverse translations (from Amino Acid to DNA) are not supported
		"""

		# Load the HMM(s)
		if not hasattr(hmm, "__iter__"):
			hmm = [hmm,]
		#load the file if h is not an HMM object
		self.hmm = list()
		for h in hmm:
			if not isinstance(h, hmmfile.HMM):
				self.hmm = self.hmm + hmmfile.read(h)
			else:
				self.hmm.append(h)

		#make sure targets is iterable
		if not hasattr(targets, '__iter__') or isinstance(targets, SeqRecord):
			targets = [targets,]
		self.targets = list(targets)
		for t in self.targets:
			if not isinstance(t, SeqRecord):
				raise ValueError("Search Targets must be SeqRecords")

		#apply unique ids
		self.targets = wrap_seqrecords(self.targets) 
		self.hmm = wrap_hmms(self.hmm)

		hmm_alpha = self.hmm[0].alph.upper()
		for h in self.hmm:
			if h.alph.upper() != hmm_alpha:
				raise ValueError("The HMMs don't all have the same alphabet")

		#get the arguments for HMMER
		args = self.getArgs(**kwargs)
		#clear the matches
		self.matches = []

		#Translate targets if necessary
		for t in self.targets:
			t_alpha = t.alphabet()
			if hmm_alpha == 'DNA':
				if t_alpha == 'DNA':
					self.matches += self._do_search(self.hmm, t, args)
				else:
					raise ValueError("Cannot search DNA model against non-DNA target")
			elif hmm_alpha == 'AMINO':
				if t_alpha == 'AMINO':
					self.matches += self._do_search(self.hmm, t, args)
				elif t_alpha == 'DNA':
					#looks like we have to convert
					for tt in tools.getSixFrameTranslation(t):
						self.matches += self._do_search(self.hmm, tt, args)
				else:
					raise ValueError("Cannot search Protein model against {} target"
							.format(t.seq.alphabet))