def _look_for_blast_matches(self, seqrecords, blastdb): "it makes the blast and filters the results" blasts, blast_fhand = _do_blast_2(blastdb, seqrecords, self.program, params=self.params) # print open(blast_fhand.name).read() if self.filters is not None: blasts = filter_alignments(blasts, config=self.filters) indexed_match_parts = {} for blast in blasts: query = blast["query"] for match in blast["matches"]: subject = match["subject"] if self.elongate_for_global: elongate_match_parts_till_global( match["match_parts"], query_length=query["length"], subject_length=subject["length"], align_completely=SUBJECT, ) match_parts = match["match_parts"] try: indexed_match_parts[query["name"]].extend(match_parts) except KeyError: indexed_match_parts[query["name"]] = match_parts blast_fhand.close() return indexed_match_parts
def _look_for_blast_matches(self, seq_fpath, oligos, seqs_type): 'It looks for the oligos in the given sequence files' # we need to keep the blast_fhands, because they're temp files and # otherwise they might be removed temp_dir = TemporaryDir() dbpath = os.path.join(temp_dir.name, os.path.basename(seq_fpath)) seqio([open(seq_fpath)], open(dbpath, 'w'), out_format='fasta', copy_if_same_format=False) blasts, blast_fhand = _do_blast_2(dbpath, oligos, params=self.params, program=self.program, dbtype=seqs_type) if self.filters is not None: blasts = filter_alignments(blasts, config=self.filters) # Which are the regions covered in each sequence? indexed_match_parts = {} for blast in blasts: oligo = blast['query'] for match in blast['matches']: read = match['subject'] if self.elongate_for_global: elongate_match_parts_till_global( match['match_parts'], query_length=oligo['length'], subject_length=read['length'], align_completely=QUERY) # match_parts = [m['match_parts'] for m in blast['matches']] match_parts = match['match_parts'] try: indexed_match_parts[read['name']].extend(match_parts) except KeyError: indexed_match_parts[read['name']] = match_parts temp_dir.close() blast_fhand.close() return indexed_match_parts
def _look_for_blast_matches(self, seq_fpath, oligos): "It looks for the oligos in the given sequence files" # we need to keep the blast_fhands, because they're temp files and # otherwise they might be removed temp_dir = TemporaryDir() dbpath = os.path.join(temp_dir.name, os.path.basename(seq_fpath)) seqio([open(seq_fpath)], [open(dbpath, "w")], out_format="fasta", copy_if_same_format=False) blasts, blast_fhand = _do_blast_2(dbpath, oligos, params=self.params, program=self.program) if self.filters is not None: blasts = filter_alignments(blasts, config=self.filters) # Which are the regions covered in each sequence? indexed_match_parts = {} one_oligo = True if len(oligos) == 1 else False for blast in blasts: oligo = blast["query"] for match in blast["matches"]: read = match["subject"] if self.elongate_for_global: elongate_match_parts_till_global( match["match_parts"], query_length=oligo["length"], subject_length=read["length"], align_completely=QUERY, ) # match_parts = [m['match_parts'] for m in blast['matches']] match_parts = match["match_parts"] if one_oligo: indexed_match_parts[read["name"]] = match_parts else: try: indexed_match_parts[read["name"]].extend(match_parts) except KeyError: indexed_match_parts[read["name"]] = match_parts temp_dir.close() blast_fhand.close() return indexed_match_parts
def _look_for_blast_matches(self, seq_fpath, oligos, seqs_type): 'It looks for the oligos in the given sequence files' # we need to keep the blast_fhands, because they're temp files and # otherwise they might be removed temp_dir = TemporaryDir() dbpath = os.path.join(temp_dir.name, os.path.basename(seq_fpath)) seqio([open(seq_fpath)], open(dbpath, 'w'), out_format='fasta', copy_if_same_format=False) blasts, blast_fhand = _do_blast_2(dbpath, oligos, params=self.params, program=self.program, dbtype=seqs_type) if self.filters is not None: blasts = filter_alignments(blasts, config=self.filters) # Which are the regions covered in each sequence? indexed_match_parts = {} for blast in blasts: oligo = blast['query'] for match in blast['matches']: read = match['subject'] if self.elongate_for_global: elongate_match_parts_till_global(match['match_parts'], query_length=oligo['length'], subject_length=read['length'], align_completely=QUERY) # match_parts = [m['match_parts'] for m in blast['matches']] match_parts = match['match_parts'] try: indexed_match_parts[read['name']].extend(match_parts) except KeyError: indexed_match_parts[read['name']] = match_parts temp_dir.close() blast_fhand.close() return indexed_match_parts
def test_match_part_elongation(self): 'The alignments get elongated till they are like global alignments' # elongate on the rigth match_part = {'query_start': 10, 'query_end': 13, 'subject_start': 0, 'subject_end': 3} elongate_match_parts_till_global([match_part], subject_length=5, query_length=20, align_completely=SUBJECT) assert match_part == {'query_start': 10, 'query_end': 14, 'subject_start': 0, 'subject_end': 4, 'elongated': 1} # elongate on the left match_part = {'query_start': 3, 'query_end': 5, 'subject_start': 3, 'subject_end': 5} elongate_match_parts_till_global([match_part], subject_length=7, query_length=20, align_completely=SUBJECT) assert match_part == {'subject_start': 0, 'query_start': 0, 'query_end': 6, 'subject_end': 6, 'elongated': 4} # reversed match_part = {'query_end': 3, 'query_start': 5, 'subject_end': 3, 'subject_start': 5} elongate_match_parts_till_global([match_part], subject_length=7, query_length=20, align_completely=SUBJECT) assert match_part == {'subject_end': 0, 'query_end': 0, 'query_start': 6, 'subject_start': 6, 'elongated': 4} # partial elongation match_part = {'query_start': 1, 'query_end': 3, 'subject_start': 2, 'subject_end': 4} elongate_match_parts_till_global([match_part], subject_length=7, query_length=5, align_completely=SUBJECT) assert match_part == {'subject_start': 1, 'query_start': 0, 'query_end': 4, 'subject_end': 5, 'elongated': 2} # No elongation match_part = {'query_start': 1, 'query_end': 3, 'subject_start': 0, 'subject_end': 2} elongate_match_parts_till_global([match_part], subject_length=3, query_length=30, align_completely=SUBJECT) assert match_part == {'subject_start': 0, 'query_start': 1, 'query_end': 3, 'subject_end': 2} # the query should be completely aligned match_part = {'query_start': 1, 'query_end': 40, 'subject_start': 59, 'subject_end': 98} elongate_match_parts_till_global([match_part], subject_length=200, query_length=42, align_completely=QUERY) assert match_part == {'query_start': 0, 'query_end': 41, 'subject_start': 58, 'subject_end': 99, 'elongated': 2}