def get_sorted_counts_per_sample(biom_table, reverse=False): """gets a sorted list of sequences per sample from min to max inputs: biom_table: biom table object revers: reverse the ordering value i. e. from max to min outputs: sorted_counts_per_sample: list of tuples sorted on first element which gives [(seqs/sample, sampleId)... ] """ sample_counts = compute_seqs_per_library_stats(biom_table)[4] sorted_counts_per_sample = [(v, k) for k, v in sample_counts.items()] sorted_counts_per_sample.sort() if reverse: sorted_counts_per_sample.reverse() return sorted_counts_per_sample
def get_sorted_counts_per_sample(biom_table, reverse=False): """gets a sorted list of sequences per sample from min to max inputs: biom_table: biom table object revers: reverse the ordering value i. e. from max to min outputs: sorted_counts_per_sample: list of tuples sorted on first element which gives [(seqs/sample, sampleId)... ] """ sample_counts = compute_seqs_per_library_stats(biom_table)[4] sorted_counts_per_sample = [(v,k) for k,v in sample_counts.items()] sorted_counts_per_sample.sort() if reverse: sorted_counts_per_sample.reverse() return sorted_counts_per_sample
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) otu_table_fp = opts.otu_table_fp otu_table = parse_biom_table(qiime_open(otu_table_fp)) min_counts, max_counts, median_counts, mean_counts, counts_per_sample = compute_seqs_per_library_stats( otu_table, opts.num_otus ) num_otus = len(otu_table.ObservationIds) counts_per_sample_values = counts_per_sample.values() med_abs_dev = median_absolute_deviation(counts_per_sample_values)[0] even_sampling_depth = guess_even_sampling_depth(counts_per_sample_values) num_samples = len(counts_per_sample) print "Num samples: %s" % str(num_samples) print "Num otus: %s" % str(num_otus) if not opts.num_otus: num_observations = sum(counts_per_sample_values) print "Num observations (sequences): %s" % str(num_observations) # port denisty functionality to a tested function. the following is broken (should be # count of non-zero cells rather than number of observations in the numerator) # print 'Table density (fraction of non-zero values): %1.4f' % (num_observations/(num_samples * num_otus)) print if opts.num_otus: print "OTUs/sample summary:" else: print "Seqs/sample summary:" print " Min: %s" % str(min_counts) print " Max: %s" % str(max_counts) print " Median: %s" % str(median_counts) print " Mean: %s" % str(mean_counts) print " Std. dev.: %s" % (str(std(counts_per_sample_values))) print " Median Absolute Deviation: %s" % str(med_abs_dev) print " Default even sampling depth in\n core_qiime_analyses.py (just a suggestion): %s" % str(even_sampling_depth) print "" if opts.num_otus: print "OTUs/sample detail:" else: print "Seqs/sample detail:" sorted_counts_per_sample = [(v, k) for k, v in counts_per_sample.items()] sorted_counts_per_sample.sort() total_count = 0 for v, k in sorted_counts_per_sample: total_count += v print " %s: %s" % (k, str(v)) if opts.mapping_fp: if not opts.output_mapping_fp: raise RuntimeError("input mapping file supplied, but no path to" + " output file") f = open(opts.mapping_fp, "U") mapping_lines, headers, comments = parse_mapping_file(f) f.close() if len(headers) == 1: endoffset = 0 # if we only have the sample id, this data -> last col else: endoffset = 1 # usually make this data the penultimate column. headers.insert(len(headers) - endoffset, "NumIndividuals") for map_line in mapping_lines: sample_id = map_line try: depth = str(counts_per_sample[map_line[0]]) except KeyError: depth = "na" map_line.insert(len(map_line) - endoffset, depth) new_map_str = format_mapping_file(headers, mapping_lines, comments) f = open(opts.output_mapping_fp, "w") f.write(new_map_str) f.close()
def main(): option_parser, opts,args = parse_command_line_parameters(**script_info) otu_table_fp = opts.otu_table_fp otu_table = parse_biom_table(qiime_open(otu_table_fp)) min_counts, max_counts, median_counts, mean_counts, counts_per_sample =\ compute_seqs_per_library_stats(otu_table, opts.num_otus) num_otus = len(otu_table.ObservationIds) counts_per_sample_values = counts_per_sample.values() med_abs_dev = median_absolute_deviation(counts_per_sample_values)[0] even_sampling_depth = guess_even_sampling_depth(counts_per_sample_values) try: sample_md_keys = otu_table.SampleMetadata[0].keys() except TypeError: sample_md_keys = ["None provided"] try: observation_md_keys = otu_table.ObservationMetadata[0].keys() except TypeError: observation_md_keys = ["None provided"] num_samples = len(counts_per_sample) print 'Num samples: %s' % str(num_samples) print 'Num otus: %s' % str(num_otus) if not opts.num_otus: num_observations = sum(counts_per_sample_values) print 'Num observations (sequences): %s' % str(num_observations) print 'Table density (fraction of non-zero values): %1.4f' % \ otu_table.getTableDensity() print if opts.num_otus: print 'OTUs/sample summary:' else: print 'Seqs/sample summary:' print ' Min: %s' % str(min_counts) print ' Max: %s' % str(max_counts) print ' Median: %s' % str(median_counts) print ' Mean: %s' % str(mean_counts) print ' Std. dev.: %s' % (str(std(counts_per_sample_values))) print ' Median Absolute Deviation: %s' % str(med_abs_dev) print ' Default even sampling depth in\n core_qiime_analyses.py (just a suggestion): %s' %\ str(even_sampling_depth) print ' Sample Metadata Categories: %s' % '; '.join(sample_md_keys) print ' Observation Metadata Categories: %s' % '; '.join(observation_md_keys) print '' if opts.num_otus: print 'OTUs/sample detail:' else: print 'Seqs/sample detail:' sorted_counts_per_sample = [(v,k) for k,v in counts_per_sample.items()] sorted_counts_per_sample.sort() total_count = 0 for v,k in sorted_counts_per_sample: total_count += v print ' %s: %s' % (k,str(v)) if opts.mapping_fp: if not opts.output_mapping_fp: raise RuntimeError('input mapping file supplied, but no path to'+\ ' output file') f = open(opts.mapping_fp,'U') mapping_lines, headers, comments = parse_mapping_file(f) f.close() if len(headers)==1: endoffset = 0 # if we only have the sample id, this data -> last col else: endoffset = 1 # usually make this data the penultimate column. headers.insert(len(headers)-endoffset,'SequenceCount') for map_line in mapping_lines: sample_id = map_line try: depth = str(counts_per_sample[map_line[0]]) except KeyError: depth = 'na' map_line.insert(len(map_line)-endoffset,depth) new_map_str = format_mapping_file(headers, mapping_lines, comments) f = open(opts.output_mapping_fp, 'w') f.write(new_map_str) f.close()
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) otu_table_fp = opts.otu_table_fp otu_table = parse_biom_table(qiime_open(otu_table_fp)) min_counts, max_counts, median_counts, mean_counts, counts_per_sample =\ compute_seqs_per_library_stats(otu_table, opts.num_otus) num_otus = len(otu_table.ObservationIds) counts_per_sample_values = counts_per_sample.values() med_abs_dev = median_absolute_deviation(counts_per_sample_values)[0] even_sampling_depth = guess_even_sampling_depth(counts_per_sample_values) num_samples = len(counts_per_sample) print 'Num samples: %s' % str(num_samples) print 'Num otus: %s' % str(num_otus) if not opts.num_otus: num_observations = sum(counts_per_sample_values) print 'Num observations (sequences): %s' % str(num_observations) # port denisty functionality to a tested function. the following is broken (should be # count of non-zero cells rather than number of observations in the numerator) #print 'Table density (fraction of non-zero values): %1.4f' % (num_observations/(num_samples * num_otus)) print if opts.num_otus: print 'OTUs/sample summary:' else: print 'Seqs/sample summary:' print ' Min: %s' % str(min_counts) print ' Max: %s' % str(max_counts) print ' Median: %s' % str(median_counts) print ' Mean: %s' % str(mean_counts) print ' Std. dev.: %s' % (str(std(counts_per_sample_values))) print ' Median Absolute Deviation: %s' % str(med_abs_dev) print ' Default even sampling depth in\n core_qiime_analyses.py (just a suggestion): %s' %\ str(even_sampling_depth) print '' if opts.num_otus: print 'OTUs/sample detail:' else: print 'Seqs/sample detail:' sorted_counts_per_sample = [(v, k) for k, v in counts_per_sample.items()] sorted_counts_per_sample.sort() total_count = 0 for v, k in sorted_counts_per_sample: total_count += v print ' %s: %s' % (k, str(v)) if opts.mapping_fp: if not opts.output_mapping_fp: raise RuntimeError('input mapping file supplied, but no path to'+\ ' output file') f = open(opts.mapping_fp, 'U') mapping_lines, headers, comments = parse_mapping_file(f) f.close() if len(headers) == 1: endoffset = 0 # if we only have the sample id, this data -> last col else: endoffset = 1 # usually make this data the penultimate column. headers.insert(len(headers) - endoffset, 'NumIndividuals') for map_line in mapping_lines: sample_id = map_line try: depth = str(counts_per_sample[map_line[0]]) except KeyError: depth = 'na' map_line.insert(len(map_line) - endoffset, depth) new_map_str = format_mapping_file(headers, mapping_lines, comments) f = open(opts.output_mapping_fp, 'w') f.write(new_map_str) f.close()
def run_alpha_rarefaction(otu_table_fp, mapping_fp, output_dir, command_handler, params, qiime_config, tree_fp=None, num_steps=10, parallel=False, logger=None, min_rare_depth=10, max_rare_depth=None, suppress_md5=False, status_update_callback=print_to_stdout, plot_stderr_and_stddev=False): """ Run the data preparation steps of Qiime The steps performed by this function are: 1) Generate rarefied OTU tables; 2) Compute alpha diversity metrics for each rarefied OTU table; 3) Collate alpha diversity results; 4) Generate alpha rarefaction plots. """ # Prepare some variables for the later steps otu_table_dir, otu_table_filename = split(otu_table_fp) otu_table_basename, otu_table_ext = splitext(otu_table_filename) create_dir(output_dir) commands = [] python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() if logger == None: logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) close_logger_on_success = True else: close_logger_on_success = False if not suppress_md5: log_input_md5s(logger, [otu_table_fp, mapping_fp, tree_fp]) if max_rare_depth == None: min_count, max_count, median_count, mean_count, counts_per_sample =\ compute_seqs_per_library_stats(parse_biom_table(open(otu_table_fp,'U'))) max_rare_depth = median_count step = int((max_rare_depth - min_rare_depth) / num_steps) or 1 max_rare_depth = int(max_rare_depth) rarefaction_dir = '%s/rarefaction/' % output_dir create_dir(rarefaction_dir) try: params_str = get_params_str(params['multiple_rarefactions']) except KeyError: params_str = '' if parallel: params_str += ' %s' % get_params_str(params['parallel']) # Build the rarefaction command rarefaction_cmd = \ '%s %s/parallel_multiple_rarefactions.py -T -i %s -m %s -x %s -s %s -o %s %s' %\ (python_exe_fp, script_dir, otu_table_fp, min_rare_depth, max_rare_depth, step, rarefaction_dir, params_str) else: # Build the rarefaction command rarefaction_cmd = \ '%s %s/multiple_rarefactions.py -i %s -m %s -x %s -s %s -o %s %s' %\ (python_exe_fp, script_dir, otu_table_fp, min_rare_depth, max_rare_depth, step, rarefaction_dir, params_str) commands.append([('Alpha rarefaction', rarefaction_cmd)]) # Prep the alpha diversity command alpha_diversity_dir = '%s/alpha_div/' % output_dir create_dir(alpha_diversity_dir) try: params_str = get_params_str(params['alpha_diversity']) except KeyError: params_str = '' if tree_fp: params_str += ' -t %s' % tree_fp if parallel: params_str += ' %s' % get_params_str(params['parallel']) # Build the alpha diversity command alpha_diversity_cmd = \ "%s %s/parallel_alpha_diversity.py -T -i %s -o %s %s" %\ (python_exe_fp, script_dir, rarefaction_dir, alpha_diversity_dir, params_str) else: # Build the alpha diversity command alpha_diversity_cmd = \ "%s %s/alpha_diversity.py -i %s -o %s %s" %\ (python_exe_fp, script_dir, rarefaction_dir, alpha_diversity_dir, params_str) commands.append(\ [('Alpha diversity on rarefied OTU tables',alpha_diversity_cmd)]) # Prep the alpha diversity collation command alpha_collated_dir = '%s/alpha_div_collated/' % output_dir create_dir(alpha_collated_dir) try: params_str = get_params_str(params['collate_alpha']) except KeyError: params_str = '' # Build the alpha diversity collation command alpha_collated_cmd = '%s %s/collate_alpha.py -i %s -o %s %s' %\ (python_exe_fp, script_dir, alpha_diversity_dir, \ alpha_collated_dir, params_str) commands.append([('Collate alpha', alpha_collated_cmd)]) # Prep the make rarefaction plot command(s) try: params_str = get_params_str(params['make_rarefaction_plots']) except KeyError: params_str = '' if 'std_type' in params[ 'make_rarefaction_plots'] or not plot_stderr_and_stddev: rarefaction_plot_dir = '%s/alpha_rarefaction_plots/' % output_dir create_dir(rarefaction_plot_dir) # Build the make rarefaction plot command(s) #for metric in alpha_diversity_metrics: make_rarefaction_plot_cmd =\ '%s %s/make_rarefaction_plots.py -i %s -m %s -o %s %s' %\ (python_exe_fp, script_dir, alpha_collated_dir, mapping_fp, rarefaction_plot_dir, params_str) commands.append(\ [('Rarefaction plot: %s' % 'All metrics',make_rarefaction_plot_cmd)]) else: rarefaction_plot_dir_stddev = '%s/alpha_rarefaction_plots_stddev/' % output_dir rarefaction_plot_dir_stderr = '%s/alpha_rarefaction_plots_stderr/' % output_dir create_dir(rarefaction_plot_dir_stddev) create_dir(rarefaction_plot_dir_stderr) # Build the make rarefaction plot command(s) # for metric in alpha_diversity_metrics: make_rarefaction_plot_cmd =\ '%s %s/make_rarefaction_plots.py -i %s -m %s -o %s %s --std_type stddev' %\ (python_exe_fp, script_dir, alpha_collated_dir, mapping_fp, rarefaction_plot_dir_stddev, params_str) commands.append(\ [('Rarefaction plot: %s' % 'All metrics',make_rarefaction_plot_cmd)]) make_rarefaction_plot_cmd =\ '%s %s/make_rarefaction_plots.py -i %s -m %s -o %s %s --std_type stderr' %\ (python_exe_fp, script_dir, alpha_collated_dir, mapping_fp, rarefaction_plot_dir_stderr, params_str) commands.append(\ [('Rarefaction plot: %s' % 'All metrics',make_rarefaction_plot_cmd)]) # Call the command handler on the list of commands command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=close_logger_on_success)
def run_alpha_rarefaction(otu_table_fp, mapping_fp, output_dir, command_handler, params, qiime_config, tree_fp=None, num_steps=10, parallel=False, logger=None, min_rare_depth=10, max_rare_depth=None, suppress_md5=False, status_update_callback=print_to_stdout, plot_stderr_and_stddev=False): """ Run the data preparation steps of Qiime The steps performed by this function are: 1) Generate rarefied OTU tables; 2) Compute alpha diversity metrics for each rarefied OTU table; 3) Collate alpha diversity results; 4) Generate alpha rarefaction plots. """ # Prepare some variables for the later steps otu_table_dir, otu_table_filename = split(otu_table_fp) otu_table_basename, otu_table_ext = splitext(otu_table_filename) create_dir(output_dir) commands = [] python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() if logger == None: logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) close_logger_on_success = True else: close_logger_on_success = False if not suppress_md5: log_input_md5s(logger,[otu_table_fp,mapping_fp,tree_fp]) if max_rare_depth == None: min_count, max_count, median_count, mean_count, counts_per_sample =\ compute_seqs_per_library_stats(parse_biom_table(open(otu_table_fp,'U'))) max_rare_depth = median_count step = int((max_rare_depth - min_rare_depth) / num_steps) or 1 max_rare_depth = int(max_rare_depth) rarefaction_dir = '%s/rarefaction/' % output_dir create_dir(rarefaction_dir) try: params_str = get_params_str(params['multiple_rarefactions']) except KeyError: params_str = '' if parallel: params_str += ' %s' % get_params_str(params['parallel']) # Build the rarefaction command rarefaction_cmd = \ '%s %s/parallel_multiple_rarefactions.py -T -i %s -m %s -x %s -s %s -o %s %s' %\ (python_exe_fp, script_dir, otu_table_fp, min_rare_depth, max_rare_depth, step, rarefaction_dir, params_str) else: # Build the rarefaction command rarefaction_cmd = \ '%s %s/multiple_rarefactions.py -i %s -m %s -x %s -s %s -o %s %s' %\ (python_exe_fp, script_dir, otu_table_fp, min_rare_depth, max_rare_depth, step, rarefaction_dir, params_str) commands.append([('Alpha rarefaction', rarefaction_cmd)]) # Prep the alpha diversity command alpha_diversity_dir = '%s/alpha_div/' % output_dir create_dir(alpha_diversity_dir) try: params_str = get_params_str(params['alpha_diversity']) except KeyError: params_str = '' if tree_fp: params_str += ' -t %s' % tree_fp if parallel: params_str += ' %s' % get_params_str(params['parallel']) # Build the alpha diversity command alpha_diversity_cmd = \ "%s %s/parallel_alpha_diversity.py -T -i %s -o %s %s" %\ (python_exe_fp, script_dir, rarefaction_dir, alpha_diversity_dir, params_str) else: # Build the alpha diversity command alpha_diversity_cmd = \ "%s %s/alpha_diversity.py -i %s -o %s %s" %\ (python_exe_fp, script_dir, rarefaction_dir, alpha_diversity_dir, params_str) commands.append(\ [('Alpha diversity on rarefied OTU tables',alpha_diversity_cmd)]) # Prep the alpha diversity collation command alpha_collated_dir = '%s/alpha_div_collated/' % output_dir create_dir(alpha_collated_dir) try: params_str = get_params_str(params['collate_alpha']) except KeyError: params_str = '' # Build the alpha diversity collation command alpha_collated_cmd = '%s %s/collate_alpha.py -i %s -o %s %s' %\ (python_exe_fp, script_dir, alpha_diversity_dir, \ alpha_collated_dir, params_str) commands.append([('Collate alpha',alpha_collated_cmd)]) # Prep the make rarefaction plot command(s) try: params_str = get_params_str(params['make_rarefaction_plots']) except KeyError: params_str = '' if 'std_type' in params['make_rarefaction_plots'] or not plot_stderr_and_stddev: rarefaction_plot_dir = '%s/alpha_rarefaction_plots/' % output_dir create_dir(rarefaction_plot_dir) # Build the make rarefaction plot command(s) #for metric in alpha_diversity_metrics: make_rarefaction_plot_cmd =\ '%s %s/make_rarefaction_plots.py -i %s -m %s -o %s %s' %\ (python_exe_fp, script_dir, alpha_collated_dir, mapping_fp, rarefaction_plot_dir, params_str) commands.append(\ [('Rarefaction plot: %s' % 'All metrics',make_rarefaction_plot_cmd)]) else: rarefaction_plot_dir_stddev = '%s/alpha_rarefaction_plots_stddev/' % output_dir rarefaction_plot_dir_stderr = '%s/alpha_rarefaction_plots_stderr/' % output_dir create_dir(rarefaction_plot_dir_stddev) create_dir(rarefaction_plot_dir_stderr) # Build the make rarefaction plot command(s) # for metric in alpha_diversity_metrics: make_rarefaction_plot_cmd =\ '%s %s/make_rarefaction_plots.py -i %s -m %s -o %s %s --std_type stddev' %\ (python_exe_fp, script_dir, alpha_collated_dir, mapping_fp, rarefaction_plot_dir_stddev, params_str) commands.append(\ [('Rarefaction plot: %s' % 'All metrics',make_rarefaction_plot_cmd)]) make_rarefaction_plot_cmd =\ '%s %s/make_rarefaction_plots.py -i %s -m %s -o %s %s --std_type stderr' %\ (python_exe_fp, script_dir, alpha_collated_dir, mapping_fp, rarefaction_plot_dir_stderr, params_str) commands.append(\ [('Rarefaction plot: %s' % 'All metrics',make_rarefaction_plot_cmd)]) # Call the command handler on the list of commands command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=close_logger_on_success)