def get_sam_ids(map_data, map_header, colorby, cat, primary_state, secondary_state): """ returns all sample ids matching the state strings and colorby:cat colorby: eg: 'Country', or pass None to not filter only colorby:cat samples cat: e.g.: 'USA' primary_state: e.g.: 'AgeCategory:Child' secondary state can be None, or like primary state returns uniquified lists in randomized order """ if colorby is None: sample_ids = [sam[0] for sam in map_data] else: sample_ids = get_sample_ids(map_data, map_header, {colorby: [cat]}) # primary key is the category label, e.g. AgeCategory # value is the val for that category, e.g. Adult # go through age1/age2 primary_states = parse_metadata_state_descriptions(primary_state) if colorby is not None: primary_states[colorby] = [cat] state1_samids = get_sample_ids(map_data, map_header, primary_states) if secondary_state is None: state2_samids = set(sample_ids).difference(set(state1_samids)) else: secondary_states =\ parse_metadata_state_descriptions(secondary_state) if colorby is not None: secondary_states[colorby] = [cat] state2_samids = get_sample_ids(map_data, map_header, secondary_states) return list(set(state1_samids)), list(set(state2_samids))
def get_sam_ids(map_data, map_header, colorby, cat, primary_state, secondary_state): """ returns all sample ids matching the state strings and colorby:cat colorby: eg: 'Country', or pass None to not filter only colorby:cat samples cat: e.g.: 'USA' primary_state: e.g.: 'AgeCategory:Child' secondary state can be None, or like primary state returns uniquified lists in randomized order """ if colorby == None: sample_ids = [sam[0] for sam in map_data] else: sample_ids = get_sample_ids(map_data, map_header, {colorby: [cat]}) # primary key is the category label, e.g. AgeCategory # value is the val for that category, e.g. Adult # go through age1/age2 primary_states = parse_metadata_state_descriptions(primary_state) if colorby != None: primary_states[colorby] = [cat] state1_samids = get_sample_ids(map_data, map_header, primary_states) if secondary_state == None: state2_samids = set(sample_ids).difference(set(state1_samids)) else: secondary_states = parse_metadata_state_descriptions(secondary_state) if colorby != None: secondary_states[colorby] = [cat] state2_samids = get_sample_ids(map_data, map_header, secondary_states) return list(set(state1_samids)), list(set(state2_samids))
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) negate = opts.negate sample_ids = opts.sample_ids mapping_fp = opts.mapping_fp input_fasta_fp = opts.input_fasta_fp output_fasta_fp = opts.output_fasta_fp if not mapping_fp: sample_ids = sample_ids.split(',') else: map_data, map_header, map_comments = parse_mapping_file(mapping_fp) sample_ids = get_sample_ids( map_data, map_header, parse_metadata_state_descriptions(sample_ids)) if len(sample_ids) == 0: raise ValueError( "No samples match the search criteria: %s" % valid_states) if opts.verbose: # This is useful when using the --valid_states feature so you can # find out if a search query didn't work as you expected before a # lot of time is spent print "Extracting samples: %s" % ', '.join(sample_ids) try: seqs = parse_fasta(open(input_fasta_fp)) except IOError: option_parser.error( 'Cannot open %s. Does it exist? Do you have read access?' % input_fasta_fp) exit(1) try: output_fasta_f = open(output_fasta_fp, 'w') except IOError: option_parser.error( "Cannot open %s. Does path exist? Do you have write access?" % output_fasta_fp) exit(1) for r in extract_seqs_by_sample_id(seqs, sample_ids, negate): output_fasta_f.write('>%s\n%s\n' % r) output_fasta_f.close()