def test_extract_seqs_by_sample_id(self): """extract_seqs_by_sample_id: functions as expected """ seqs = [('Samp1_109','ACGG'),\ ('Samp1_110','CCGG'),\ ('samp1_109','GCGG'),\ ('S2','AA'),\ ('S3','CC'),\ ('S4','GG'),\ ('S44','TT'),\ ('S4','TAAT')] sample_ids = ['Samp1', 'S44'] expected = [('Samp1_109','ACGG'),\ ('Samp1_110','CCGG'),\ ('S44','TT')] actual = list(extract_seqs_by_sample_id(seqs, sample_ids)) self.assertEqual(actual, expected) #negated expected_neg = [('samp1_109','GCGG'),\ ('S2','AA'),\ ('S3','CC'),\ ('S4','GG'),\ ('S4','TAAT')] actual = list(extract_seqs_by_sample_id(seqs, sample_ids, negate=True)) self.assertEqual(actual, expected_neg) # OK if user passes dict of sample ids sample_ids = {'samp1': 25} expected = [('samp1_109', 'GCGG')] actual = list(extract_seqs_by_sample_id(seqs, sample_ids)) self.assertEqual(actual, expected)
def test_extract_seqs_by_sample_id(self): """extract_seqs_by_sample_id: functions as expected """ seqs = [('Samp1_109','ACGG'),\ ('Samp1_110','CCGG'),\ ('samp1_109','GCGG'),\ ('S2','AA'),\ ('S3','CC'),\ ('S4','GG'),\ ('S44','TT'),\ ('S4','TAAT')] sample_ids = ['Samp1','S44'] expected = [('Samp1_109','ACGG'),\ ('Samp1_110','CCGG'),\ ('S44','TT')] actual = list(extract_seqs_by_sample_id(seqs,sample_ids)) self.assertEqual(actual,expected) #negated expected_neg = [('samp1_109','GCGG'),\ ('S2','AA'),\ ('S3','CC'),\ ('S4','GG'),\ ('S4','TAAT')] actual = list(extract_seqs_by_sample_id(seqs,sample_ids,negate=True)) self.assertEqual(actual,expected_neg) # OK if user passes dict of sample ids sample_ids = {'samp1':25} expected = [('samp1_109','GCGG')] actual = list(extract_seqs_by_sample_id(seqs,sample_ids)) self.assertEqual(actual,expected)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) negate = opts.negate sample_ids = opts.sample_ids mapping_fp = opts.mapping_fp input_fasta_fp = opts.input_fasta_fp output_fasta_fp = opts.output_fasta_fp if not mapping_fp: sample_ids = sample_ids.split(',') else: map_data, map_header, map_comments = parse_mapping_file(mapping_fp) sample_ids = get_sample_ids( map_data, map_header, parse_metadata_state_descriptions(sample_ids)) if len(sample_ids) == 0: raise ValueError( "No samples match the search criteria: %s" % valid_states) if opts.verbose: # This is useful when using the --valid_states feature so you can # find out if a search query didn't work as you expected before a # lot of time is spent print "Extracting samples: %s" % ', '.join(sample_ids) try: seqs = parse_fasta(open(input_fasta_fp)) except IOError: option_parser.error( 'Cannot open %s. Does it exist? Do you have read access?' % input_fasta_fp) exit(1) try: output_fasta_f = open(output_fasta_fp, 'w') except IOError: option_parser.error( "Cannot open %s. Does path exist? Do you have write access?" % output_fasta_fp) exit(1) for r in extract_seqs_by_sample_id(seqs, sample_ids, negate): output_fasta_f.write('>%s\n%s\n' % r) output_fasta_f.close()