def test_surefmt_load_gbk2fas(self): count = seqfile_ops.write_genbank(self.gbk_filename, self.record) self.assertIs(count, 1) fas_record = seqfile_ops.surefmt_load(self.gbk_filename, 'fasta', generic_dna) self.assertEqual(fas_record.id, self.record.id)
def seq_subset_load(infile, subset_mode, subset_args): """Load a subset of sequence segments from a sequence file.""" from analysis.sequence_ops import feat_collect, feature_coords, \ coord_chop, get_seq_subset_by_coords from analysis.seqfile_ops import load_multifasta, surefmt_load, \ write_fasta from analysis.text_manipulation import adaptive_list_load if subset_mode is 'flatfile': # in this case the sequence file MUST be multifasta try: subset = load_multifasta(infile) except: raise else: print "set of", len(subset), "sequence segments" subset_file = infile else: # load the query single sequence file (convert format if necessary) try: seq_record = surefmt_load(infile, 'fasta', 'generic_dna') except: raise else: print "query sequence loaded from", infile # load or generate coordinate pairs for target segments if subset_mode is 'coordinates': try: coords_file = subset_args['file'] header = subset_args['header'] columns = subset_args['columns'] coords_list = adaptive_list_load(coords_file, header, columns) except: raise else: print len(coords_list), "segments loaded from", infile elif subset_mode is 'features': try: feat_mode = subset_args features = feat_collect(infile, feat_mode) coords_list = feature_coords(features) print coords_list except: raise else: print len(coords_list),"features loaded from", infile elif subset_mode is 'size': try: size = subset_args['size'] chop_mode = subset_args['chop_mode'] coords_list = coord_chop(len(seq_record.seq), size, chop_mode) except: raise else: print len(coords_list), "segments generated to fit", size else: print "ERROR: A mode MUST be specified." coords_list = None # collect subset of sequence segments using resulting coords_list try: subset = get_seq_subset_by_coords(seq_record, coords_list) except: raise else: print "subset of", len(subset), "sequence segments" # save subset to multifasta file for later use or reference subset_file = seq_record.id+'_subset.fas' try: write_fasta(subset_file, subset) except: raise else: print "subset written to fasta file", subset_file return subset, subset_file
def test_surefmt_load_gbk2gbk(self): count = seqfile_ops.write_genbank(self.gbk_filename, self.record) self.assertIs(count, 1) gbk_record = seqfile_ops.surefmt_load(self.gbk_filename, 'genbank', generic_dna) self.assertEqual(gbk_record.id, self.record.id) # check features for index in range (0,1): self.assertEqual(gbk_record.features[index].type, self.record.features[index].type)
def test_surefmt_load_fas2gbk(self): count = seqfile_ops.write_fasta(self.fas_filename, self.record) self.assertIs(count, 1) gbk_record = seqfile_ops.surefmt_load(self.fas_filename, 'genbank', generic_dna) self.assertEqual(gbk_record.id, self.record.id)