Exemplo n.º 1
0
def seq_subset_load(infile, subset_mode, subset_args):
    """Load a subset of sequence segments from a sequence file."""
    from analysis.sequence_ops import feat_collect, feature_coords, \
        coord_chop, get_seq_subset_by_coords 
    from analysis.seqfile_ops import load_multifasta, surefmt_load, \
        write_fasta
    from analysis.text_manipulation import adaptive_list_load
    if subset_mode is 'flatfile':
        # in this case the sequence file MUST be multifasta
        try: subset = load_multifasta(infile)
        except: raise
        else:
            print "set of", len(subset), "sequence segments"
            subset_file = infile
    else:
        # load the query single sequence file (convert format if necessary)
        try: seq_record = surefmt_load(infile, 'fasta', 'generic_dna')
        except: raise
        else: print "query sequence loaded from", infile
        # load or generate coordinate pairs for target segments
        if subset_mode is 'coordinates':
            try:
                coords_file = subset_args['file']
                header = subset_args['header']
                columns = subset_args['columns']
                coords_list = adaptive_list_load(coords_file, header, columns)
            except: raise
            else: print len(coords_list), "segments loaded from", infile
        elif subset_mode is 'features':
            try:
                feat_mode = subset_args
                features = feat_collect(infile, feat_mode)
                coords_list = feature_coords(features)
                print coords_list
            except: raise
            else: print len(coords_list),"features loaded from", infile
        elif subset_mode is 'size':
            try:
                size = subset_args['size']
                chop_mode = subset_args['chop_mode']
                coords_list = coord_chop(len(seq_record.seq), size, chop_mode)
            except: raise
            else: print len(coords_list), "segments generated to fit", size
        else:
            print "ERROR: A mode MUST be specified."
            coords_list = None
        # collect subset of sequence segments using resulting coords_list
        try: subset = get_seq_subset_by_coords(seq_record, coords_list)
        except: raise
        else: print "subset of", len(subset), "sequence segments"
        # save subset to multifasta file for later use or reference
        subset_file = seq_record.id+'_subset.fas'
        try: write_fasta(subset_file, subset)
        except: raise
        else: print "subset written to fasta file", subset_file
    return subset, subset_file
Exemplo n.º 2
0
 def test_feat_collect_gene_by_product(self):
     self.assertIs(self.count, 1)
     feat_mode = {"types": ("gene"), "tags": {"locus_tag": ["locustag 1", "locustag 4"]}}
     collected = sequence_ops.feat_collect(self.filename, feat_mode)
     self.assertIs(len(collected), 1)
Exemplo n.º 3
0
 def test_feat_collect_mixed(self):
     self.assertIs(self.count, 1)
     feat_mode = {"types": ("CDS", "gene"), "tags": {"locus_tag": ("locustag 3"), "product": (("product 2"))}}
     collected = sequence_ops.feat_collect(self.filename, feat_mode)
     self.assertIs(len(collected), 2)
Exemplo n.º 4
0
 def test_feat_collect_all_genes(self):
     self.assertIs(self.count, 1)
     feat_mode = {"types": ("genes"), "tags": {}}
     collected = sequence_ops.feat_collect(self.filename, feat_mode)
     self.assertIs(len(collected), 2)