def generate_alpha_rarefaction_data_from_point_in_omega( biom_object, metrics, sequences, iterations, tree_object=None): """generate alpha rarefaction data from a biom table and mapping file Inputs: biom_object: OTU table to be rarefied and used to compute alpha diversity metrics: list of metrics, phylogenetic or non phylogenetic sequences: maximum number of sequences for the rarefaction plots iterations: number of repetitions per rarefaction tree_object: tree to perform the phylogenetic operations, default is None Output: alpha_rarefaction_data: dictionary where the keys are alpha diversity metrics and the values are tuples; in these tuples the first element is a list of column headers for an alpha diversity file, the second element is a list of row headers for an alpha diversity file and the third element is a list of lists containing the alpha diversity data computed at multiple rarefaction depths and as many iterations as specified. """ # The minimum depth is defined by the size of the maximum depth steps = 4 min_depth = int(ceil(sequences / steps)) # get a rarefied biom with the proper identifiers rarefied_bioms_list = get_rarefactions(biom_object, min_depth, sequences,\ iterations, steps) alpha_rs = {} alpha_filenames = [] # rarefy all the biom objects and get the alpha diversity values for rarefied_biom in rarefied_bioms_list: # this tag contains data about the iteration and the depth identifier = 'alpha_rare_%s_%s' % (str( rarefied_biom[0]), str(rarefied_biom[1])) alpha_values = single_object_alpha(rarefied_biom[2], metrics, tree_object) alpha_rs[identifier] = (rarefied_biom[0], rarefied_biom[1], alpha_values.split('\n')) alpha_filenames.append(identifier) # use the rarefaction with the fewest sequences per sample as the reference ref_rare = single_object_alpha(rarefied_bioms_list[0][2], metrics,\ tree_object=tree_object).split('\n') all_metrics, all_samples, example_data = parse_matrix(ref_rare) # build a dictionary with the data for each of the metrics specified metrics_data = {} for metric in all_metrics: per_metric_data = [] for filename in alpha_filenames: f_metrics, f_samples, f_data = parse_matrix(alpha_rs[filename][2]) per_metric_data.append(make_output_row(f_metrics, metric,\ f_samples, f_data, filename, len(all_samples), all_samples)) metrics_data[metric] = per_metric_data # now format the dictionary to make it compatible with make_averages alpha_rarefaction_data = _format_rarefactions(metrics_data, all_samples) return alpha_rarefaction_data
def generate_alpha_rarefaction_data_from_point_in_omega(biom_object, metrics, sequences, iterations, tree_object=None): """generate alpha rarefaction data from a biom table and mapping file Inputs: biom_object: OTU table to be rarefied and used to compute alpha diversity metrics: list of metrics, phylogenetic or non phylogenetic sequences: maximum number of sequences for the rarefaction plots iterations: number of repetitions per rarefaction tree_object: tree to perform the phylogenetic operations, default is None Output: alpha_rarefaction_data: dictionary where the keys are alpha diversity metrics and the values are tuples; in these tuples the first element is a list of column headers for an alpha diversity file, the second element is a list of row headers for an alpha diversity file and the third element is a list of lists containing the alpha diversity data computed at multiple rarefaction depths and as many iterations as specified. """ # The minimum depth is defined by the size of the maximum depth steps = 4 min_depth = int(ceil(sequences / steps)) # get a rarefied biom with the proper identifiers rarefied_bioms_list = get_rarefactions(biom_object, min_depth, sequences,\ iterations, steps) alpha_rs = {} alpha_filenames = [] # rarefy all the biom objects and get the alpha diversity values for rarefied_biom in rarefied_bioms_list: # this tag contains data about the iteration and the depth identifier = 'alpha_rare_%s_%s' % (str(rarefied_biom[0]), str(rarefied_biom[1])) alpha_values = single_object_alpha(rarefied_biom[2], metrics, tree_object) alpha_rs[identifier] = (rarefied_biom[0], rarefied_biom[1], alpha_values.split('\n')) alpha_filenames.append(identifier) # use the rarefaction with the fewest sequences per sample as the reference ref_rare = single_object_alpha(rarefied_bioms_list[0][2], metrics,\ tree_object=tree_object).split('\n') all_metrics, all_samples, example_data = parse_matrix(ref_rare) # build a dictionary with the data for each of the metrics specified metrics_data = {} for metric in all_metrics: per_metric_data = [] for filename in alpha_filenames: f_metrics, f_samples, f_data = parse_matrix(alpha_rs[filename][2]) per_metric_data.append(make_output_row(f_metrics, metric,\ f_samples, f_data, filename, len(all_samples), all_samples)) metrics_data[metric] = per_metric_data # now format the dictionary to make it compatible with make_averages alpha_rarefaction_data = _format_rarefactions(metrics_data, all_samples) return alpha_rarefaction_data
def test_make_output_rows(self): f_metrics = ["met1"] metric = "met1" f_samples = ["s1", "s2"] f_data = numpy.array([[0.4], [0.8]]) fname = "alpha_rarefaction_10_7" num_cols = 2 all_samples = ["s1", "s2"] res = make_output_row(f_metrics, metric, f_samples, f_data, fname, num_cols, all_samples) self.assertEqual(res, ["alpha_rarefaction_10_7", 10, 7, "0.4", "0.8"])
def test_make_output_rows(self): f_metrics = ['met1'] metric = 'met1' f_samples = ['s1','s2'] f_data = numpy.array([[.4],[.8]]) fname = 'alpha_rarefaction_10_7' num_cols= 2 all_samples = ['s1','s2'] res = make_output_row(f_metrics, metric, f_samples, f_data, fname, num_cols, all_samples) self.assertEqual(res,['alpha_rarefaction_10_7', 10, 7, '0.4', '0.8'])
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) if len(args) != 0: parser.error("Positional argument detected. make sure all" + ' parameters are identified.' + '\ne.g.: include the \"-m\" in \"-m MINIMUM_LENGTH\"') input_dir = opts.input_path output_dir = opts.output_path example_filepath = opts.example_path if not os.path.exists(output_dir): os.makedirs(output_dir) file_names = os.listdir(input_dir) file_names = [fname for fname in file_names if not fname.startswith('.')] if example_filepath is None: # table row is base_name, seqs_per_sam, iters, ext file_name_table = map(parse_rarefaction_fname, file_names) # sort on seqs/sam sorted_fname_table = sorted( file_name_table, key=operator.itemgetter(1)) # now map back to file name example_fname = file_names[ file_name_table.index(sorted_fname_table[0])] example_filepath = os.path.join(input_dir, example_fname) f = open(example_filepath, 'U') all_metrics, all_samples, example_data = parse_matrix(f) num_cols = len(all_samples) f.close() # make the table 1 row at a time # we're building a rarefaction by sample mtx from # a sample by metric matrix # each metric is one output file for metric in all_metrics: metric_file_data = [] for fname in file_names: # f_ here refers to the input file currently being processed # to distinguish from the output file we're building f = open(os.path.join(input_dir, fname), 'U') f_metrics, f_samples, f_data = parse_matrix(f) f.close() metric_file_data.append( make_output_row(f_metrics, metric, f_samples, f_data, fname, num_cols, all_samples)) write_output_file(metric_file_data, output_dir, metric, all_samples)