def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) map_fp = opts.mapping biom_fp = opts.biom_file min_seqs_sample = opts.min_seqs_sample subject_category = opts.subject_name cleaned_fp = opts.clean_fp verbose = opts.verbose map_data, headers, comments = parse_mapping_file(open(map_fp, 'U')) biom_table = parse_biom_table(open(biom_fp, 'U')) # getting valid samples from biom file real_map_headers, real_map_data = filter_mapping_file(map_data, headers,\ biom_table.SampleIds, include_repeat_cols=False) if subject_category not in real_map_headers: raise ValueError, 'This column: %s is not in the mapping file, try %s'%\ (subject_category, real_map_headers) sorted_counts_per_sample = get_sorted_counts_per_sample(biom_table) mapping_file_tuple = (real_map_data, real_map_headers) # calculate the available subjects at each rarefaction level results = make_selectors(sorted_counts_per_sample, min_seqs_sample,\ mapping_file_tuple, subject_category, verbose=verbose) # save the output fout = open(cleaned_fp,'w') fout.write('#Sequences\tSubjects\tSamples\tMetadata\n') fout.write('\n'.join(results)) fout.close()
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) otu_table_fp = opts.otu_table_fp output_dir = opts.output_dir mapping_fp = opts.mapping_fp tree_fp = opts.tree_fp verbose = opts.verbose print_only = opts.print_only seqs_per_sample = int(opts.seqs_per_sample) parallel = opts.parallel min_seqs_sample = opts.min_seqs_sample subject_category = opts.subject_name try: makedirs(output_dir) except OSError: if opts.force: pass else: # Since the analysis can take quite a while, I put this check # in to help users avoid overwriting previous output. option_parser.error("Output directory already exists. Please choose" " a different directory, or force overwrite with -f.") ## ******************** make_evident_selectors ******************** ## The code for make_evident_selectors.py is here and has to go before the params ## validation as we need to know the main cats before creating the params file map_data, headers, comments = parse_mapping_file(open(mapping_fp, 'U')) biom_table = parse_biom_table(open(otu_table_fp, 'U')) # getting valid samples from biom file real_map_headers, real_map_data = filter_mapping_file(map_data, headers,\ biom_table.SampleIds, include_repeat_cols=False) if subject_category not in real_map_headers: option_parser.error('This column: %s is not in the mapping file, try %s'%\ (subject_category, real_map_headers)) sorted_counts_per_sample = get_sorted_counts_per_sample(biom_table) mapping_file_tuple = (real_map_data, real_map_headers) # calculate the available subjects at each rarefaction level results, main_map_cat = make_selectors(sorted_counts_per_sample, min_seqs_sample,\ mapping_file_tuple, subject_category, verbose=verbose) fout = open(join(output_dir,'selectors.txt'),'w') fout.write('#Sequences\tSubjects\tSamples\tMetadata\n') fout.write('\n'.join(results)) fout.close() fout = open(join(output_dir,'mapping_file.txt'),'w') fout.write(format_mapping_file(real_map_headers, real_map_data)) fout.close() ## ******************** make_evident_selectors ******************** fout = open(join(output_dir,'study_preferences.txt'),'w') fout.write('%d\n' % seqs_per_sample) fout.write('%s\n' % subject_category) fout.close() ## ******************** filter_samples_from_otu_table ******************** ## Filtering original biom file to only have samples above the max length to avoid ## ugly plots alpha_biom_file = join(output_dir,'filtered_otu_table_for_alpha.biom') fout = open(alpha_biom_file,'w') sample_ids_to_keep = biom_table.SampleIds filtered_otu_table = filter_samples_from_otu_table(biom_table, sample_ids_to_keep, min_count=seqs_per_sample, max_count=inf) fout.write(format_biom_table(filtered_otu_table)) fout.close() ## ******************** filter_samples_from_otu_table ******************** if opts.parameter_fp: try: parameter_f = open(opts.parameter_fp, 'U') except IOError: option_parser.error("Can't open parameters file (%s). Does it exist? " \ "Do you have read access?" % opts.parameter_fp) params = parse_qiime_parameters(parameter_f) parameter_f.close() else: params = parse_qiime_parameters( ['beta_diversity:metrics unweighted_unifrac',\ 'make_rarefaction_plots:prefs_path %s' % join(output_dir,'prefs.txt'), 'make_rarefaction_plots:colorby %s' % ','.join(main_map_cat), 'make_rarefaction_plots:output_type memory', 'multiple_rarefactions:min %d' % int(seqs_per_sample/4), 'multiple_rarefactions:max %d' % (seqs_per_sample+1), 'multiple_rarefactions:step %d' % int(seqs_per_sample/4), 'multiple_rarefactions:num-reps 4', ]) # empty list returns empty defaultdict for now jobs_to_start = opts.jobs_to_start default_jobs_to_start = qiime_config['jobs_to_start'] validate_and_set_jobs_to_start(params, jobs_to_start, default_jobs_to_start, parallel, option_parser) if print_only: command_handler = print_commands else: command_handler = call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates copyfile(otu_table_fp, join(output_dir,'raw.biom')) run_beta_diversity_through_plots(otu_table_fp=otu_table_fp, mapping_fp=mapping_fp, output_dir=output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, color_by_interesting_fields_only=False, sampling_depth=seqs_per_sample, histogram_categories=None, tree_fp=tree_fp, parallel=parallel, suppress_3d_plots=True, suppress_2d_plots=True, status_update_callback=status_update_callback) output_dir = join(output_dir,'alpha') run_alpha_rarefaction(otu_table_fp=alpha_biom_file,\ mapping_fp=mapping_fp,\ output_dir=output_dir,\ command_handler=command_handler,\ params=params, qiime_config=qiime_config,\ tree_fp=tree_fp,\ num_steps=4,\ parallel=parallel,\ min_rare_depth=10, max_rare_depth=20, status_update_callback=status_update_callback, plot_stderr_and_stddev=True)