示例#1
0
    def test_extract_seqs_by_sample_id(self):
        """extract_seqs_by_sample_id: functions as expected """

        seqs = [('Samp1_109','ACGG'),\
                ('Samp1_110','CCGG'),\
                ('samp1_109','GCGG'),\
                ('S2','AA'),\
                ('S3','CC'),\
                ('S4','GG'),\
                ('S44','TT'),\
                ('S4','TAAT')]
        sample_ids = ['Samp1', 'S44']
        expected = [('Samp1_109','ACGG'),\
                    ('Samp1_110','CCGG'),\
                    ('S44','TT')]
        actual = list(extract_seqs_by_sample_id(seqs, sample_ids))
        self.assertEqual(actual, expected)

        #negated
        expected_neg = [('samp1_109','GCGG'),\
                ('S2','AA'),\
                ('S3','CC'),\
                ('S4','GG'),\
                ('S4','TAAT')]
        actual = list(extract_seqs_by_sample_id(seqs, sample_ids, negate=True))
        self.assertEqual(actual, expected_neg)

        # OK if user passes dict of sample ids
        sample_ids = {'samp1': 25}
        expected = [('samp1_109', 'GCGG')]
        actual = list(extract_seqs_by_sample_id(seqs, sample_ids))
        self.assertEqual(actual, expected)
示例#2
0
 def test_extract_seqs_by_sample_id(self):
     """extract_seqs_by_sample_id: functions as expected """
     
     seqs = [('Samp1_109','ACGG'),\
             ('Samp1_110','CCGG'),\
             ('samp1_109','GCGG'),\
             ('S2','AA'),\
             ('S3','CC'),\
             ('S4','GG'),\
             ('S44','TT'),\
             ('S4','TAAT')]
     sample_ids = ['Samp1','S44']
     expected = [('Samp1_109','ACGG'),\
                 ('Samp1_110','CCGG'),\
                 ('S44','TT')]
     actual = list(extract_seqs_by_sample_id(seqs,sample_ids))
     self.assertEqual(actual,expected)
     
     #negated
     expected_neg = [('samp1_109','GCGG'),\
             ('S2','AA'),\
             ('S3','CC'),\
             ('S4','GG'),\
             ('S4','TAAT')]
     actual = list(extract_seqs_by_sample_id(seqs,sample_ids,negate=True))
     self.assertEqual(actual,expected_neg)
     
     # OK if user passes dict of sample ids
     sample_ids = {'samp1':25}
     expected = [('samp1_109','GCGG')]
     actual = list(extract_seqs_by_sample_id(seqs,sample_ids))
     self.assertEqual(actual,expected)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    negate = opts.negate
    sample_ids = opts.sample_ids
    mapping_fp = opts.mapping_fp
    input_fasta_fp = opts.input_fasta_fp
    output_fasta_fp = opts.output_fasta_fp

    if not mapping_fp:
        sample_ids = sample_ids.split(',')
    else:
        map_data, map_header, map_comments = parse_mapping_file(mapping_fp)
        sample_ids = get_sample_ids(
            map_data,
            map_header,
            parse_metadata_state_descriptions(sample_ids))
        if len(sample_ids) == 0:
            raise ValueError(
                "No samples match the search criteria: %s" %
                valid_states)

    if opts.verbose:
        # This is useful when using the --valid_states feature so you can
        # find out if a search query didn't work as you expected before a
        # lot of time is spent
        print "Extracting samples: %s" % ', '.join(sample_ids)

    try:
        seqs = parse_fasta(open(input_fasta_fp))
    except IOError:
        option_parser.error(
            'Cannot open %s. Does it exist? Do you have read access?' %
            input_fasta_fp)
        exit(1)

    try:
        output_fasta_f = open(output_fasta_fp, 'w')
    except IOError:
        option_parser.error(
            "Cannot open %s. Does path exist? Do you have write access?" %
            output_fasta_fp)
        exit(1)

    for r in extract_seqs_by_sample_id(seqs, sample_ids, negate):
        output_fasta_f.write('>%s\n%s\n' % r)
    output_fasta_f.close()
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    negate = opts.negate
    sample_ids = opts.sample_ids
    mapping_fp = opts.mapping_fp
    input_fasta_fp = opts.input_fasta_fp
    output_fasta_fp = opts.output_fasta_fp

    if not mapping_fp:
        sample_ids = sample_ids.split(',')
    else:
        map_data, map_header, map_comments = parse_mapping_file(mapping_fp)
        sample_ids = get_sample_ids(
            map_data,
            map_header,
            parse_metadata_state_descriptions(sample_ids))
        if len(sample_ids) == 0:
            raise ValueError(
                "No samples match the search criteria: %s" %
                valid_states)

    if opts.verbose:
        # This is useful when using the --valid_states feature so you can
        # find out if a search query didn't work as you expected before a
        # lot of time is spent
        print "Extracting samples: %s" % ', '.join(sample_ids)

    try:
        seqs = parse_fasta(open(input_fasta_fp))
    except IOError:
        option_parser.error(
            'Cannot open %s. Does it exist? Do you have read access?' %
            input_fasta_fp)
        exit(1)

    try:
        output_fasta_f = open(output_fasta_fp, 'w')
    except IOError:
        option_parser.error(
            "Cannot open %s. Does path exist? Do you have write access?" %
            output_fasta_fp)
        exit(1)

    for r in extract_seqs_by_sample_id(seqs, sample_ids, negate):
        output_fasta_f.write('>%s\n%s\n' % r)
    output_fasta_f.close()