Exemplo n.º 1
0
    def test_build_blast_db_from_seqs(self):
        """build_blast_db_from_seqs convenience function works as expected
        """
        blast_db, db_files = build_blast_db_from_seqs(self.in_seqs1,
                                                      output_dir='/tmp')
        self.assertTrue(blast_db.startswith('/tmp/Blast_tmp_db'))
        self.assertTrue(blast_db.endswith('.fasta'))
        expected_db_files = set([blast_db + ext\
         for ext in ['.nhr','.nin','.nsq','.nsd','.nsi','.log']])
        self.assertEqual(set(db_files), expected_db_files)

        # result returned when blasting against new db
        self.assertEqual(\
            len(blastn(self.test_seq,blast_db=blast_db)),1)

        # Make sure all db_files exist
        for fp in db_files:
            self.assertTrue(exists(fp))

        # Remove all db_files exist
        remove_files(db_files)

        # Make sure nothing weird happened in the remove
        for fp in db_files:
            self.assertFalse(exists(fp))
Exemplo n.º 2
0
    def test_blast_against_new_db(self):
        """Formatdb: blastall against a newly created DB functions as expected
        """
        fdb = FormatDb(WorkingDir="/tmp")
        result = fdb(self.in_seqs1_fp)
        blast_res = blastn(self.test_seq, blast_db=self.in_seqs1_fp)
        result.cleanUp()

        # Test that a blast result was returned
        self.assertTrue("s1" in blast_res, "Not getting any blast results.")
        # Test that the sequence we expect was a good blast hit
        subject_ids = [r["SUBJECT ID"] for r in blast_res["s1"][0]]
        self.assertTrue("11472384" in subject_ids, "Not getting expected blast results.")
Exemplo n.º 3
0
    def test_blast_against_new_db(self):
        """Formatdb: blastall against a newly created DB functions as expected
        """
        fdb = FormatDb(WorkingDir='/tmp')
        result = fdb(self.in_seqs1_fp)
        blast_res = blastn(self.test_seq, blast_db=self.in_seqs1_fp)
        result.cleanUp()

        # Test that a blast result was returned
        self.assertTrue('s1' in blast_res,\
         "Not getting any blast results.")
        # Test that the sequence we expect was a good blast hit
        subject_ids = [r['SUBJECT ID'] for r in blast_res['s1'][0]]
        self.assertTrue('11472384' in subject_ids,\
         "Not getting expected blast results.")
Exemplo n.º 4
0
    def test_build_blast_db_from_fasta_path_aln(self):
        """build_blast_db_from_fasta_path works with alignment as input
        """
        blast_db, db_files = build_blast_db_from_fasta_path(self.in_aln1_fp)
        self.assertEqual(blast_db, self.in_aln1_fp)
        expected_db_files = set([blast_db + ext for ext in [".nhr", ".nin", ".nsq", ".nsd", ".nsi", ".log"]])
        self.assertEqual(set(db_files), expected_db_files)
        # result returned when blasting against new db
        self.assertEqual(len(blastn(self.test_seq, blast_db=blast_db, e_value=0.0)), 1)

        # Make sure all db_files exist
        for fp in db_files:
            self.assertTrue(exists(fp))

        # Remove all db_files exist
        remove_files(db_files)

        # Make sure nothing weird happened in the remove
        for fp in db_files:
            self.assertFalse(exists(fp))
Exemplo n.º 5
0
    def test_build_blast_db_from_fasta_path(self):
        """build_blast_db_from_fasta_path convenience function works as expected
        """
        blast_db, db_files = build_blast_db_from_fasta_path(self.in_seqs1_fp)
        self.assertEqual(blast_db, self.in_seqs1_fp)
        expected_db_files = set([self.in_seqs1_fp + ext for ext in [".nhr", ".nin", ".nsq", ".nsd", ".nsi", ".log"]])
        self.assertEqual(set(db_files), expected_db_files)

        # result returned when blasting against new db
        self.assertEqual(len(blastn(self.test_seq, blast_db=blast_db)), 1)

        # Make sure all db_files exist
        for fp in db_files:
            self.assertTrue(exists(fp))

        # Remove all db_files exist
        remove_files(db_files)

        # Make sure nothing weird happened in the remove
        for fp in db_files:
            self.assertFalse(exists(fp))
Exemplo n.º 6
0
    def test_build_blast_db_from_fasta_path_aln(self):
        """build_blast_db_from_fasta_path works with alignment as input
        """
        blast_db, db_files = build_blast_db_from_fasta_path(self.in_aln1_fp)
        self.assertEqual(blast_db, self.in_aln1_fp)
        expected_db_files = set([blast_db + ext\
         for ext in ['.nhr','.nin','.nsq','.nsd','.nsi','.log']])
        self.assertEqual(set(db_files), expected_db_files)
        # result returned when blasting against new db
        self.assertEqual(\
            len(blastn(self.test_seq,blast_db=blast_db,e_value=0.0)),1)

        # Make sure all db_files exist
        for fp in db_files:
            self.assertTrue(exists(fp))

        # Remove all db_files exist
        remove_files(db_files)

        # Make sure nothing weird happened in the remove
        for fp in db_files:
            self.assertFalse(exists(fp))
Exemplo n.º 7
0
    def test_build_blast_db_from_seqs(self):
        """build_blast_db_from_seqs convenience function works as expected
        """
        blast_db, db_files = build_blast_db_from_seqs(self.in_seqs1, output_dir="/tmp")
        self.assertTrue(blast_db.startswith("/tmp/Blast_tmp_db"))
        self.assertTrue(blast_db.endswith(".fasta"))
        expected_db_files = set([blast_db + ext for ext in [".nhr", ".nin", ".nsq", ".nsd", ".nsi", ".log"]])
        self.assertEqual(set(db_files), expected_db_files)

        # result returned when blasting against new db
        self.assertEqual(len(blastn(self.test_seq, blast_db=blast_db)), 1)

        # Make sure all db_files exist
        for fp in db_files:
            self.assertTrue(exists(fp))

        # Remove all db_files exist
        remove_files(db_files)

        # Make sure nothing weird happened in the remove
        for fp in db_files:
            self.assertFalse(exists(fp))
Exemplo n.º 8
0
    def test_build_blast_db_from_fasta_file(self):
        """build_blast_db_from_fasta_file works with open files as input
        """
        blast_db, db_files = build_blast_db_from_fasta_file(open(self.in_aln1_fp), output_dir="/tmp/")
        self.assertTrue(blast_db.startswith("/tmp/BLAST_temp_db"))
        self.assertTrue(blast_db.endswith(".fasta"))
        expected_db_files = set(
            [blast_db] + [blast_db + ext for ext in [".nhr", ".nin", ".nsq", ".nsd", ".nsi", ".log"]]
        )
        self.assertEqual(set(db_files), expected_db_files)
        # result returned when blasting against new db
        self.assertEqual(len(blastn(self.test_seq, blast_db=blast_db, e_value=0.0)), 1)

        # Make sure all db_files exist
        for fp in db_files:
            self.assertTrue(exists(fp))

        # Remove all db_files exist
        remove_files(db_files)

        # Make sure nothing weird happened in the remove
        for fp in db_files:
            self.assertFalse(exists(fp))
Exemplo n.º 9
0
def pick_parents(blastdb, ref_seqs,target_sequences,nhits=10,params=None):
    """Pick the most likely parents for a chimeric sequence

    blastdb : pre-formatted db of all parents
    ref_seqs : aligned parent sequences
    target_sequences : a dict of sequences
    nhits : number of "best" parents to take
    """
    for seq_id, seq in target_sequences:
        query_seqs = [("%d" % i, s) for i,s in \
                                    enumerate(fractionate_sequence(seq))]
        blastdb = os.path.join(os.getcwd(), blastdb)
        result = blastn(query_seqs, blastdb)
        ref_db = {}
        for q, best_hits in result.bestHitsByQuery(n=nhits):
            for rec in best_hits:
                id_ = rec['SUBJECT ID']
                ref_db[rec['SUBJECT ID']] = ref_seqs[id_]
        
        if not ref_db:
            continue

        yield (ref_db,seq_id, seq)
Exemplo n.º 10
0
def pick_parents(blastdb, ref_seqs, target_sequences, nhits=10, params=None):
    """Pick the most likely parents for a chimeric sequence

    blastdb : pre-formatted db of all parents
    ref_seqs : aligned parent sequences
    target_sequences : a dict of sequences
    nhits : number of "best" parents to take
    """
    for seq_id, seq in target_sequences:
        query_seqs = [("%d" % i, s) for i,s in \
                                    enumerate(fractionate_sequence(seq))]
        blastdb = os.path.join(os.getcwd(), blastdb)
        result = blastn(query_seqs, blastdb)
        ref_db = {}
        for q, best_hits in result.bestHitsByQuery(n=nhits):
            for rec in best_hits:
                id_ = rec['SUBJECT ID']
                ref_db[rec['SUBJECT ID']] = ref_seqs[id_]

        if not ref_db:
            continue

        yield (ref_db, seq_id, seq)
Exemplo n.º 11
0
    def test_build_blast_db_from_fasta_file(self):
        """build_blast_db_from_fasta_file works with open files as input
        """
        blast_db, db_files = \
         build_blast_db_from_fasta_file(open(self.in_aln1_fp),output_dir='/tmp/')
        self.assertTrue(blast_db.startswith('/tmp/BLAST_temp_db'))
        self.assertTrue(blast_db.endswith('.fasta'))
        expected_db_files = set([blast_db] + [blast_db + ext\
         for ext in ['.nhr','.nin','.nsq','.nsd','.nsi','.log']])
        self.assertEqual(set(db_files), expected_db_files)
        # result returned when blasting against new db
        self.assertEqual(\
            len(blastn(self.test_seq,blast_db=blast_db,e_value=0.0)),1)

        # Make sure all db_files exist
        for fp in db_files:
            self.assertTrue(exists(fp))

        # Remove all db_files exist
        remove_files(db_files)

        # Make sure nothing weird happened in the remove
        for fp in db_files:
            self.assertFalse(exists(fp))
Exemplo n.º 12
0
    def test_build_blast_db_from_fasta_path(self):
        """build_blast_db_from_fasta_path convenience function works as expected
        """
        blast_db, db_files = \
         build_blast_db_from_fasta_path(self.in_seqs1_fp)
        self.assertEqual(blast_db, self.in_seqs1_fp)
        expected_db_files = set([self.in_seqs1_fp + ext\
         for ext in ['.nhr','.nin','.nsq','.nsd','.nsi','.log']])
        self.assertEqual(set(db_files), expected_db_files)

        # result returned when blasting against new db
        self.assertEqual(\
            len(blastn(self.test_seq,blast_db=blast_db)),1)

        # Make sure all db_files exist
        for fp in db_files:
            self.assertTrue(exists(fp))

        # Remove all db_files exist
        remove_files(db_files)

        # Make sure nothing weird happened in the remove
        for fp in db_files:
            self.assertFalse(exists(fp))