示例#1
0
 def test_blast_record_set(self):
     # prepare database
     seqfile_ops.write_fasta(self.db_file, self.db_records)
     db_records_list = seqfile_ops.load_multifasta(self.db_file)
     index = 0
     for record in db_records_list:
         self.assertEqual(record.id,self.db_records[index].id)
         self.assertEqual(str(record.seq),str(self.db_records[index].seq))
         index +=1
     # make database
     self.dbfile_path, db_report = blasting.make_blastDB(self.temp_dir,
                                                         self.db_name,
                                                         self.db_file,
                                                         'nucl')
     self.assertIs(db_report['status'], 0)
     self.assertEquals(db_report['message'], 'database exists')
     # run local blast batch (with multiple queries)
     matches_multi = blasting.blast_record_set(self.dbfile_path,
                                               self.multi_records,
                                               self.prefs)
     self.assertIs(len(matches_multi), 3)
     index = 0
     for record in self.multi_records:
         self.assertEqual(matches_multi[record.id][0]['contig_id'],
                          self.multi_records[index].id)
         self.assertEqual(matches_multi[record
         .id][0]['details']['match_p100'], 100) 
         index +=1
示例#2
0
 def test_local_blastn(self):
     # prepare query
     seqfile_ops.write_fasta(self.single_q_file, self.single_record)
     query_record = seqfile_ops.load_fasta(self.single_q_file)
     self.assertEqual(query_record.id,self.record_1.id)
     self.assertEqual(str(query_record.seq),str(self.record_1.seq))
     # prepare database
     seqfile_ops.write_fasta(self.db_file, self.db_records)
     records_list = seqfile_ops.load_multifasta(self.db_file)
     index = 0
     for record in records_list:
         self.assertEqual(record.id,self.db_records[index].id)
         self.assertEqual(str(record.seq),str(self.db_records[index].seq))
         index +=1
     # make database
     self.dbfile_path, db_report = blasting.make_blastDB(self.temp_dir,
                                                         self.db_name,
                                                         self.db_file,
                                                         'nucl')
     self.assertIs(db_report['status'], 0)
     self.assertEquals(db_report['message'], 'database exists')
     # run local blast with single query
     self.status = blasting.local_blastn(self.single_q_file,
                                         self.single_out_file,
                                         self.dbfile_path,
                                         self.prefs)
     self.assertEquals(self.status['output'], '')
     self.assertIsNone(self.status['error'])
     # parse blast output
     matches_single = blasting.parse_blast_out6(self.single_out_file,
                                                self.prefs)
     self.assertIs(len(matches_single), 1)
     self.assertEqual(matches_single[0]['contig_id'],
                      self.single_record.id)
     self.assertEqual(matches_single[0]['details']['match_p100'], 100)
示例#3
0
 def test_write_and_load_multifasta(self):
     count = seqfile_ops.write_fasta(self.fas_filename,
                                           self.three_records) 
     self.assertIs(count, 3)
     fas_records = seqfile_ops.load_multifasta(self.fas_filename)
     for index in range (0,2):
         self.assertEqual(fas_records[index].id,
                          self.three_records[index].id)
示例#4
0
def seq_subset_load(infile, subset_mode, subset_args):
    """Load a subset of sequence segments from a sequence file."""
    from analysis.sequence_ops import feat_collect, feature_coords, \
        coord_chop, get_seq_subset_by_coords 
    from analysis.seqfile_ops import load_multifasta, surefmt_load, \
        write_fasta
    from analysis.text_manipulation import adaptive_list_load
    if subset_mode is 'flatfile':
        # in this case the sequence file MUST be multifasta
        try: subset = load_multifasta(infile)
        except: raise
        else:
            print "set of", len(subset), "sequence segments"
            subset_file = infile
    else:
        # load the query single sequence file (convert format if necessary)
        try: seq_record = surefmt_load(infile, 'fasta', 'generic_dna')
        except: raise
        else: print "query sequence loaded from", infile
        # load or generate coordinate pairs for target segments
        if subset_mode is 'coordinates':
            try:
                coords_file = subset_args['file']
                header = subset_args['header']
                columns = subset_args['columns']
                coords_list = adaptive_list_load(coords_file, header, columns)
            except: raise
            else: print len(coords_list), "segments loaded from", infile
        elif subset_mode is 'features':
            try:
                feat_mode = subset_args
                features = feat_collect(infile, feat_mode)
                coords_list = feature_coords(features)
                print coords_list
            except: raise
            else: print len(coords_list),"features loaded from", infile
        elif subset_mode is 'size':
            try:
                size = subset_args['size']
                chop_mode = subset_args['chop_mode']
                coords_list = coord_chop(len(seq_record.seq), size, chop_mode)
            except: raise
            else: print len(coords_list), "segments generated to fit", size
        else:
            print "ERROR: A mode MUST be specified."
            coords_list = None
        # collect subset of sequence segments using resulting coords_list
        try: subset = get_seq_subset_by_coords(seq_record, coords_list)
        except: raise
        else: print "subset of", len(subset), "sequence segments"
        # save subset to multifasta file for later use or reference
        subset_file = seq_record.id+'_subset.fas'
        try: write_fasta(subset_file, subset)
        except: raise
        else: print "subset written to fasta file", subset_file
    return subset, subset_file