def read_blast_alignment(ofile, srcDB, destDB, al=None, pipeline=None, translateSrc=False, translateDest=False): """Apply sequence of transforms to read input from 'ofile'. srcDB: database for finding query sequences from the blast input; destDB: database for finding subject sequences from the blast input; al, if not None, must be a writeable alignment object in which to store the alignment intervals; translateSrc=True forces creation of a TranslationDB representing the possible 6-frames of srcDB (for blastx, tblastx); translateDest=True forces creation of a TranslationDB representing the possible 6-frames of destDB (for tblastn, tblastx). If pipeline is not None, it must be a list of filter functions each taking a single argument and returning an iterator or iterable result object. """ p = BlastHitParser() d = dict(id='src_id', start='src_start', stop='src_end', ori='src_ori', idDest='dest_id', startDest='dest_start', stopDest='dest_end', oriDest='dest_ori') if translateSrc: srcDB = translationDB.get_translation_db(srcDB) if translateDest: destDB = translationDB.get_translation_db(destDB) cti = CoordsToIntervals(srcDB, destDB, d) alignedIvals = cti(p.parse_file(ofile)) if pipeline is None: result = save_interval_alignment(alignedIvals, al) else: # apply all the filters in our pipeline one by one result = alignedIvals for f in pipeline: result = f(result) return result
def __getitem__(self, query): """generate slices for all translations of the query """ # generate NLMSA for this single sequence al = self(query) # get the translation database for the sequence tdb = translationDB.get_translation_db(query.db) # run through all of the frames & find alignments. slices = [] for trans_seq in tdb[query.id].iter_frames(): try: slice = al[trans_seq] except KeyError: continue if not isinstance(slice, EmptySlice): slices.append(slice) return slices