def _do_search(self, seq, seqdb, args): seq_file = tempfile.NamedTemporaryFile() seqdb_file = tempfile.NamedTemporaryFile() out_file = tempfile.NamedTemporaryFile() hmm_file = tempfile.NamedTemporaryFile() SeqIO.write(seq, seq_file, 'fasta') SeqIO.write(seqdb, seqdb_file, 'fasta') seq_file.flush() seqdb_file.flush() p = Popen(['jackhmmer', '--qformat', 'fasta', '--tformat', 'fasta', '--chkhmm', hmm_file.name, '--domtblout', out_file.name,] + args + [seq_file.name, seqdb_file.name,], stdout=PIPE, stdin=PIPE, stderr=PIPE) out = p.communicate() self.matches += matchfile.load(out_file, seq, self.seqdb) #load the hmms try: i = 1 while True: f = "{}-{}.hmm".format(hmm_file.name, i) self.hmms.append(hmmfile.read(f)[0]) os.remove(f) i += 1 except IOError: pass out_file.close() seq_file.close() seqdb_file.close()
def search(self, hmm, targets, **kwargs): """Perform the search hmm: a file name or an HMM object which has been loaded from a file targets: the sequences to search - a fasta filename or one or more Bio.SeqRecord If the hmm performs searches on Amino Acids and and of the inputs are DNA sequences, 6-frame translations will be produced automatically Reverse translations (from Amino Acid to DNA) are not supported """ # Load the HMM(s) if not hasattr(hmm, "__iter__"): hmm = [hmm,] #load the file if h is not an HMM object self.hmm = list() for h in hmm: if not isinstance(h, hmmfile.HMM): self.hmm = self.hmm + hmmfile.read(h) else: self.hmm.append(h) #make sure targets is iterable if not hasattr(targets, '__iter__') or isinstance(targets, SeqRecord): targets = [targets,] self.targets = list(targets) for t in self.targets: if not isinstance(t, SeqRecord): raise ValueError("Search Targets must be SeqRecords") #apply unique ids self.targets = wrap_seqrecords(self.targets) self.hmm = wrap_hmms(self.hmm) hmm_alpha = self.hmm[0].alph.upper() for h in self.hmm: if h.alph.upper() != hmm_alpha: raise ValueError("The HMMs don't all have the same alphabet") #get the arguments for HMMER args = self.getArgs(**kwargs) #clear the matches self.matches = [] #Translate targets if necessary for t in self.targets: t_alpha = t.alphabet() if hmm_alpha == 'DNA': if t_alpha == 'DNA': self.matches += self._do_search(self.hmm, t, args) else: raise ValueError("Cannot search DNA model against non-DNA target") elif hmm_alpha == 'AMINO': if t_alpha == 'AMINO': self.matches += self._do_search(self.hmm, t, args) elif t_alpha == 'DNA': #looks like we have to convert for tt in tools.getSixFrameTranslation(t): self.matches += self._do_search(self.hmm, tt, args) else: raise ValueError("Cannot search Protein model against {} target" .format(t.seq.alphabet))