def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) negate = opts.negate error_msg = "Must pass exactly one of -a, -b, -s, -p, -m, or --valid_states and --mapping_fp." if 1 != sum( map(bool, [ opts.otu_map, opts.seq_id_fp, opts.subject_fasta_fp, opts.seq_id_prefix, opts.biom_fp, opts.sample_id_fp, opts.mapping_fp and opts.valid_states ])): option_parser.error(error_msg) seqid_f = None if opts.otu_map: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_otu_map( open(opts.otu_map, 'U')) elif opts.seq_id_fp: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_seq_id_file( open(opts.seq_id_fp, 'U')) elif opts.subject_fasta_fp: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_fasta_file( open(opts.subject_fasta_fp, 'U')) elif opts.seq_id_prefix: seqs_to_keep_lookup = None seqid_f = lambda x: x.startswith(opts.seq_id_prefix) elif opts.mapping_fp and opts.valid_states: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_mapping_file( open(opts.mapping_fp, 'U'), opts.valid_states) seqid_f = lambda x: x.split()[0].rsplit('_')[0] in seqs_to_keep_lookup elif opts.biom_fp: seqs_to_keep_lookup = \ get_seqs_to_keep_lookup_from_biom(opts.biom_fp) elif opts.sample_id_fp: sample_ids = set( [e.strip().split()[0] for e in open(opts.sample_id_fp, 'U')]) seqs_to_keep_lookup = \ get_seqs_to_keep_lookup_from_sample_ids(sample_ids) seqid_f = lambda x: x.split()[0].rsplit('_')[0] in seqs_to_keep_lookup else: option_parser.error(error_msg) if opts.input_fasta_fp.endswith('.fastq'): filter_fp_f = filter_fastq else: filter_fp_f = filter_fasta input_fasta_f = open(opts.input_fasta_fp, 'U') output_fasta_f = open(opts.output_fasta_fp, 'w') filter_fp_f(input_fasta_f, output_fasta_f, seqs_to_keep_lookup, negate, seqid_f=seqid_f)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) output_f = open(opts.output_distance_matrix, 'w') if opts.otu_table_fp: otu_table = parse_biom_table(open(opts.otu_table_fp, 'U')) samples_to_keep = otu_table.SampleIds #samples_to_keep = \ # sample_ids_from_otu_table(open(opts.otu_table_fp,'U')) elif opts.sample_id_fp: samples_to_keep = \ get_seqs_to_keep_lookup_from_seq_id_file(open(opts.sample_id_fp,'U')) elif opts.mapping_fp and opts.valid_states: samples_to_keep = sample_ids_from_metadata_description( open(opts.mapping_fp, 'U'), opts.valid_states) else: option_parser.error( 'must pass either --sample_id_fp, -t, or -m and -s') # note that negate gets a little weird here. The function we're calling removes the specified # samples from the distance matrix, but the other QIIME filter scripts keep these samples specified. # So, the interface of this script is designed to keep the specified samples, and therefore # negate=True is passed to filter_samples_from_distance_matrix by default. d = filter_samples_from_distance_matrix(parse_distmat( open(opts.input_distance_matrix, 'U')), samples_to_keep, negate=not opts.negate) output_f.write(d) output_f.close()
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) negate = opts.negate if 1 != sum(map(bool,[opts.otu_map, opts.seq_id_fp, opts.subject_fasta_fp, opts.seq_id_prefix])): option_parser.error("Must pass exactly one of -a, -s, -p, or -m.") if opts.otu_map: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_otu_map( open(opts.otu_map,'U')) elif opts.seq_id_fp: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_seq_id_file( open(opts.seq_id_fp,'U')) elif opts.subject_fasta_fp: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_fasta_file( open(opts.subject_fasta_fp,'U')) elif opts.seq_id_prefix: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_prefix( open(opts.input_fasta_fp),opts.seq_id_prefix) else: option_parser.error("Must pass exactly one of -a, -s, or -m.") filter_fasta_fp(opts.input_fasta_fp, opts.output_fasta_fp, seqs_to_keep_lookup, negate)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) output_f = open(opts.output_distance_matrix, 'w') if opts.otu_table_fp: otu_table = load_table(opts.otu_table_fp) samples_to_keep = otu_table.ids() # samples_to_keep = \ # sample_ids_from_otu_table(open(opts.otu_table_fp,'U')) elif opts.sample_id_fp: samples_to_keep = \ get_seqs_to_keep_lookup_from_seq_id_file( open(opts.sample_id_fp, 'U')) elif opts.mapping_fp and opts.valid_states: try: samples_to_keep = sample_ids_from_metadata_description( open(opts.mapping_fp, 'U'), opts.valid_states) except ValueError as e: option_parser.error(e.message) else: option_parser.error('must pass either --sample_id_fp, -t, or -m and ' '-s') # note that negate gets a little weird here. The function we're calling # removes the specified samples from the distance matrix, but the other # QIIME filter scripts keep these samples specified. So, the interface of # this script is designed to keep the specified samples, and therefore # negate=True is passed to filter_samples_from_distance_matrix by default. d = filter_samples_from_distance_matrix( parse_distmat( open(opts.input_distance_matrix, 'U')), samples_to_keep, negate=not opts.negate) output_f.write(d) output_f.close()
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) mapping_fp = opts.input_fp out_mapping_fp = opts.output_fp valid_states = opts.valid_states if opts.sample_id_fp: valid_sample_ids = \ get_seqs_to_keep_lookup_from_seq_id_file(open(opts.sample_id_fp,'U')) elif mapping_fp and valid_states: valid_sample_ids = sample_ids_from_metadata_description( open(mapping_fp, 'U'), valid_states) data, headers, _ = parse_mapping_file(open(mapping_fp, 'U')) good_mapping_file = [] for line in data: if line[0] in valid_sample_ids: good_mapping_file.append(line) lines = format_mapping_file(headers, good_mapping_file) fd = open(out_mapping_fp, 'w') fd.write(lines) fd.close()
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) negate = opts.negate error_msg = "Must pass exactly one of -a, -b, -s, -p, -m, or --valid_states and --mapping_fp." if 1 != sum(map(bool, [opts.otu_map, opts.seq_id_fp, opts.subject_fasta_fp, opts.seq_id_prefix, opts.biom_fp, opts.sample_id_fp, opts.mapping_fp and opts.valid_states])): option_parser.error(error_msg) if opts.otu_map: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_otu_map( open(opts.otu_map, 'U')) elif opts.seq_id_fp: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_seq_id_file( open(opts.seq_id_fp, 'U')) elif opts.subject_fasta_fp: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_fasta_file( open(opts.subject_fasta_fp, 'U')) elif opts.seq_id_prefix: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_prefix( open(opts.input_fasta_fp), opts.seq_id_prefix) elif opts.mapping_fp and opts.valid_states: seqs_to_keep_lookup =\ get_seqs_to_keep_lookup_from_mapping_file( open(opts.input_fasta_fp, 'U'), open(opts.mapping_fp, 'U'), opts.valid_states) elif opts.biom_fp: seqs_to_keep_lookup = \ get_seqs_to_keep_lookup_from_biom(opts.biom_fp) elif opts.sample_id_fp: sample_ids = set([e.strip().split()[0] for e in open(opts.sample_id_fp, 'U')]) seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_sample_ids( open(opts.input_fasta_fp), sample_ids) else: option_parser.error(error_msg) if opts.input_fasta_fp.endswith('.fastq'): filter_fp_f = filter_fastq else: filter_fp_f = filter_fasta input_fasta_f = open(opts.input_fasta_fp, 'U') output_fasta_f = open(opts.output_fasta_fp, 'w') filter_fp_f(input_fasta_f, output_fasta_f, seqs_to_keep_lookup, negate)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) negate = opts.negate error_msg = "Must pass exactly one of -a, -b, -s, -p, -m, or --valid_states and --mapping_fp." if 1 != sum( map( bool, [ opts.otu_map, opts.seq_id_fp, opts.subject_fasta_fp, opts.seq_id_prefix, opts.biom_fp, opts.sample_id_fp, opts.mapping_fp and opts.valid_states, ], ) ): option_parser.error(error_msg) seqid_f = None if opts.otu_map: seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_otu_map(open(opts.otu_map, "U")) elif opts.seq_id_fp: seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_seq_id_file(open(opts.seq_id_fp, "U")) elif opts.subject_fasta_fp: seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_fasta_file(open(opts.subject_fasta_fp, "U")) elif opts.seq_id_prefix: seqs_to_keep_lookup = None seqid_f = lambda x: x.startswith(opts.seq_id_prefix) elif opts.mapping_fp and opts.valid_states: seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_mapping_file(open(opts.mapping_fp, "U"), opts.valid_states) seqid_f = lambda x: x.split()[0].rsplit("_")[0] in seqs_to_keep_lookup elif opts.biom_fp: seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_biom(opts.biom_fp) elif opts.sample_id_fp: sample_ids = set([e.strip().split()[0] for e in open(opts.sample_id_fp, "U")]) seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_sample_ids(sample_ids) seqid_f = lambda x: x.split()[0].rsplit("_")[0] in seqs_to_keep_lookup else: option_parser.error(error_msg) if opts.input_fasta_fp.endswith(".fastq"): filter_fp_f = filter_fastq else: filter_fp_f = filter_fasta input_fasta_f = open(opts.input_fasta_fp, "U") output_fasta_f = open(opts.output_fasta_fp, "w") filter_fp_f(input_fasta_f, output_fasta_f, seqs_to_keep_lookup, negate, seqid_f=seqid_f)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) output_f = open(opts.output_distance_matrix,'w') if opts.otu_table_fp: otu_table = parse_biom_table(open(opts.otu_table_fp,'U')) samples_to_keep = otu_table.SampleIds #samples_to_keep = \ # sample_ids_from_otu_table(open(opts.otu_table_fp,'U')) elif opts.sample_id_fp: samples_to_keep = \ get_seqs_to_keep_lookup_from_seq_id_file(open(opts.sample_id_fp,'U')) elif opts.mapping_fp and opts.valid_states: try: samples_to_keep = sample_ids_from_metadata_description( open(opts.mapping_fp,'U'),opts.valid_states) except ValueError, e: option_parser.error(e.message)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) output_f = open(opts.output_distance_matrix, 'w') if opts.otu_table_fp: otu_table = parse_biom_table(open(opts.otu_table_fp, 'U')) samples_to_keep = otu_table.SampleIds #samples_to_keep = \ # sample_ids_from_otu_table(open(opts.otu_table_fp,'U')) elif opts.sample_id_fp: samples_to_keep = \ get_seqs_to_keep_lookup_from_seq_id_file(open(opts.sample_id_fp,'U')) elif opts.mapping_fp and opts.valid_states: try: samples_to_keep = sample_ids_from_metadata_description( open(opts.mapping_fp, 'U'), opts.valid_states) except ValueError, e: option_parser.error(e.message)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) input_tree_fp = opts.input_tree_fp tips_fp = opts.tips_fp fasta_fp = opts.fasta_fp output_tree_fp = opts.output_tree_fp if tips_fp != None: tips_to_keep = get_seqs_to_keep_lookup_from_seq_id_file(open(tips_fp,'U')) elif fasta_fp != None: tips_to_keep = get_seqs_to_keep_lookup_from_fasta_file(open(fasta_fp,'U')) else: option_parser.error("Must provide either -t or -f.") tree = DndParser(open(input_tree_fp,'U')) if opts.negate: tips_to_keep = negate_tips_to_keep(tips_to_keep, tree) filtered_tree = filter_tree(tree,tips_to_keep) filtered_tree.writeToFile(output_tree_fp)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) input_tree_fp = opts.input_tree_fp tips_fp = opts.tips_fp fasta_fp = opts.fasta_fp output_tree_fp = opts.output_tree_fp if tips_fp != None: tips_to_keep = get_seqs_to_keep_lookup_from_seq_id_file( open(tips_fp, 'U')) elif fasta_fp != None: tips_to_keep = get_seqs_to_keep_lookup_from_fasta_file( open(fasta_fp, 'U')) else: option_parser.error("Must provide either -t or -f.") tree = DndParser(open(input_tree_fp, 'U')) if opts.negate: tips_to_keep = negate_tips_to_keep(tips_to_keep, tree) filtered_tree = filter_tree(tree, tips_to_keep) filtered_tree.writeToFile(output_tree_fp)