def test_get_clusters(self): data = asarray([val for val in self.otu_table.iter_data(axis="observation")]) obs = get_clusters(data, axis="row") self.assertTrue([0, 1, 2] == obs or obs == [1, 2, 0]) obs = get_clusters(data, axis="column") exp = [2, 3, 1, 4, 0, 5] self.assertEqual(obs, exp)
def test_get_clusters(self): obs = get_clusters(self.otu_table, axis='row') exp = [0, 1, 2] self.assertEqual(obs,exp) obs = get_clusters(self.otu_table, axis='column') exp = [0, 5, 4, 1, 2, 3] self.assertEqual(obs,exp)
def test_get_clusters(self): data = asarray([val for val in self.otu_table.iter_data(axis='observation')]) obs = get_clusters(data, axis='row') self.assertTrue([0, 1, 2] == obs or obs == [1, 2, 0]) obs = get_clusters(data, axis='column') exp = [2, 3, 1, 4, 0, 5] self.assertEqual(obs, exp)
def test_get_clusters(self): data = asarray([val for val in self.otu_table.iterObservationData()]) obs = get_clusters(data, axis='row') exp = [0, 1, 2] self.assertEqual(obs,exp) obs = get_clusters(data, axis='column') exp = [0, 5, 4, 1, 2, 3] self.assertEqual(obs,exp)
def test_get_clusters(self): data = asarray([val for val in self.otu_table.iterObservationData()]) obs = get_clusters(data, axis='row') exp = [0, 1, 2] self.assertEqual(obs, exp) obs = get_clusters(data, axis='column') exp = [0, 5, 4, 1, 2, 3] self.assertEqual(obs, exp)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) otu_table = parse_biom_table(open(opts.otu_table_fp, 'U')) lineages = [] if (otu_table.ObservationMetadata is None or 'taxonomy' not in otu_table.ObservationMetadata[0]): print '\n\nWarning: The lineages are missing from the OTU table. If you used single_rarefaction.py to create your otu_table, make sure you included the OTU lineages.\n' lineages = [''] * len(otu_table.ObservationIds) else: for val, id, meta in otu_table.iterObservations(): lineages.append([v for v in meta['taxonomy']]) otu_labels = make_otu_labels(otu_table.ObservationIds, lineages) # Convert to relative abundance if requested if not opts.absolute_abundance: otu_table = otu_table.normObservationBySample() # Get log transform if requested if not opts.no_log_transform: if not opts.log_eps is None and opts.log_eps <= 0: print "Parameter 'log_eps' must be positive. Value was", opts.log_eps exit(1) otu_table = get_log_transform(otu_table, opts.log_eps) if opts.output_dir: if os.path.exists(opts.output_dir): dir_path = opts.output_dir else: try: os.mkdir(opts.output_dir) dir_path = opts.output_dir except OSError: pass else: dir_path = './' # Re-order samples by tree if provided if not opts.sample_tree is None: sample_order = get_order_from_tree(otu_table.SampleIds, open(opts.sample_tree, 'U')) # if there's no sample tree, sort samples by mapping file elif not opts.map_fname is None: lines = open(opts.map_fname, 'U').readlines() metadata = list(parse_mapping_file(lines)) new_map, otu_table = get_overlapping_samples(metadata[0], otu_table) metadata[0] = new_map map_sample_ids = zip(*metadata[0])[0] # if there's a category, do clustering within each category if not opts.category is None: category_labels = \ extract_metadata_column(otu_table.SampleIds, \ metadata, opts.category) sample_order = \ get_order_from_categories(otu_table, category_labels) # else: just use the mapping file order else: ordered_sample_ids = [] for sample_id in map_sample_ids: if sample_id in otu_table.SampleIds: ordered_sample_ids.append(sample_id) sample_order = names_to_indices(otu_table.SampleIds, ordered_sample_ids) # if no tree or mapping file, perform upgma euclidean elif not opts.suppress_column_clustering: data = asarray([i for i in otu_table.iterObservationData()]) sample_order = get_clusters(data, axis='column') # else just use OTU table ordering else: sample_order = arange(len(otu_table.SampleIds)) # re-order OTUs by tree (if provided), or clustering if not opts.otu_tree is None: # open tree file try: f = open(opts.otu_tree, 'U') except (TypeError, IOError): raise MissingFileError, \ "Couldn't read tree file at path: %s" % opts.otu_tree otu_order = get_order_from_tree(otu_table.ObservationIds, f) f.close() # if no tree or mapping file, perform upgma euclidean elif not opts.suppress_row_clustering: data = asarray([i for i in otu_table.iterObservationData()]) otu_order = get_clusters(data, axis='row') # else just use OTU table ordering else: otu_order = arange(len(otu_table.ObservationIds)) # otu_order and sample_order should be ids, rather than indices # to use in sortObservationOrder/sortSampleOrder otu_id_order = [otu_table.ObservationIds[i] for i in otu_order] sample_id_order = [otu_table.SampleIds[i] for i in sample_order] # Re-order otu table, sampleids, etc. as necessary otu_table = otu_table.sortObservationOrder(otu_id_order) # otu_ids not used after: tagged for deletion otu_ids = array(otu_table.ObservationIds)[otu_order] otu_labels = array(otu_labels)[otu_order] otu_table = otu_table.sortSampleOrder(sample_id_order) sample_ids = array(otu_table.SampleIds)[sample_order] plot_heatmap(otu_table, otu_labels, sample_ids, filename=join(dir_path, 'heatmap.pdf'))
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) otu_table = load_table(opts.otu_table_fp) obs_md_category = opts.obs_md_category obs_md_level = opts.obs_md_level if obs_md_level is None: # grab the last level if the user didn't specify a level obs_md_level = -1 else: # convert to 0-based indexing obs_md_level -= 1 obs_md = otu_table.metadata(axis='observation') # create reference to the observation metadata for the first # observation for convenient lookup obs_md_0 = obs_md[0] obs_md_labels = [] if (obs_md is None or obs_md_category not in obs_md_0): obs_md_labels = [['']] * len(otu_table.ids(axis='observation')) else: for _, _, md in otu_table.iter(axis='observation'): current_md = md[obs_md_category] if obs_md_level < len(current_md): current_md_at_level = current_md[obs_md_level] else: current_md_at_level = '' obs_md_labels.append([current_md_at_level]) otu_labels = make_otu_labels(otu_table.ids(axis='observation'), obs_md_labels) # Convert to relative abundance if requested if not opts.absolute_abundance: otu_table = otu_table.norm(axis='observation') # Get log transform if requested if not opts.no_log_transform: otu_table = get_log_transform(otu_table) # Re-order samples by tree if provided if opts.sample_tree is not None: sample_order = get_order_from_tree(otu_table.ids(), open(opts.sample_tree, 'U')) # if there's no sample tree, sort samples by mapping file elif opts.map_fname is not None: lines = open(opts.map_fname, 'U').readlines() metadata = list(parse_mapping_file(lines)) new_map, otu_table = get_overlapping_samples(metadata[0], otu_table) metadata[0] = new_map map_sample_ids = zip(*metadata[0])[0] # if there's a category, do clustering within each category if opts.category is not None: category_labels = extract_metadata_column(otu_table.ids(), metadata, opts.category) sample_order = get_order_from_categories(otu_table, category_labels) # else: just use the mapping file order else: ordered_sample_ids = [] for sample_id in map_sample_ids: if otu_table.exists(sample_id): ordered_sample_ids.append(sample_id) sample_order = names_to_indices( otu_table.ids(), ordered_sample_ids) # if no tree or mapping file, perform upgma euclidean elif not opts.suppress_column_clustering: data = np.asarray([i for i in otu_table.iter_data(axis='observation')]) sample_order = get_clusters(data, axis='column') # else just use OTU table ordering else: sample_order = np.arange(len(otu_table.ids())) # re-order OTUs by tree (if provided), or clustering if opts.otu_tree is not None: # open tree file try: f = open(opts.otu_tree, 'U') except (TypeError, IOError): raise MissingFileError("Couldn't read tree file at path: %s" % opts.otu_tree) otu_order = get_order_from_tree(otu_table.ids(axis='observation'), f) f.close() # if no tree or mapping file, perform upgma euclidean elif not opts.suppress_row_clustering: data = np.asarray([i for i in otu_table.iter_data(axis='observation')]) otu_order = get_clusters(data, axis='row') # else just use OTU table ordering else: otu_order = np.arange(len(otu_table.ids(axis='observation'))) # otu_order and sample_order should be ids, rather than indices # to use in sortObservationOrder/sortSampleOrder otu_id_order = [otu_table.ids(axis='observation')[i] for i in otu_order] sample_id_order = [otu_table.ids()[i] for i in sample_order] # Re-order otu table, sampleids, etc. as necessary otu_table = otu_table.sort_order(otu_id_order, axis='observation') # otu_ids not used after: tagged for deletion otu_ids = np.array(otu_table.ids(axis='observation'))[otu_order] otu_labels = np.array(otu_labels)[otu_order] otu_table = otu_table.sort_order(sample_id_order) sample_ids = np.array(otu_table.ids())[sample_order] plot_heatmap(otu_table, otu_labels, sample_ids, opts.output_fp, imagetype=opts.imagetype, width=opts.width, height=opts.height, dpi=opts.dpi, color_scheme=opts.color_scheme)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) otu_table = parse_biom_table(open(opts.otu_table_fp, 'U')) obs_md_category = opts.obs_md_category obs_md_level = opts.obs_md_level if obs_md_level is None: # grab the last level if the user didn't specify a level obs_md_level = -1 else: # convert to 0-based indexing obs_md_level -= 1 obs_md = otu_table.ObservationMetadata # create reference to the observation metadata for the first # observation for convenient lookup obs_md_0 = obs_md[0] obs_md_labels = [] if (obs_md is None or obs_md_category not in obs_md_0): obs_md_labels = [['']] * len(otu_table.ObservationIds) else: for _, _, md in otu_table.iterObservations(): current_md = md[obs_md_category] if obs_md_level < len(current_md): current_md_at_level = current_md[obs_md_level] else: current_md_at_level = '' obs_md_labels.append([current_md_at_level]) otu_labels = make_otu_labels(otu_table.ObservationIds, obs_md_labels) # Convert to relative abundance if requested if not opts.absolute_abundance: otu_table = otu_table.normObservationBySample() # Get log transform if requested if not opts.no_log_transform: if not opts.log_eps is None and opts.log_eps <= 0: print "Parameter 'log_eps' must be positive. Value was", opts.log_eps exit(1) otu_table = get_log_transform(otu_table, opts.log_eps) if opts.output_dir: if os.path.exists(opts.output_dir): dir_path = opts.output_dir else: try: os.mkdir(opts.output_dir) dir_path = opts.output_dir except OSError: pass else: dir_path = './' # Re-order samples by tree if provided if not opts.sample_tree is None: sample_order = get_order_from_tree(otu_table.SampleIds, open(opts.sample_tree, 'U')) # if there's no sample tree, sort samples by mapping file elif not opts.map_fname is None: lines = open(opts.map_fname, 'U').readlines() metadata = list(parse_mapping_file(lines)) new_map, otu_table = get_overlapping_samples(metadata[0], otu_table) metadata[0] = new_map map_sample_ids = zip(*metadata[0])[0] # if there's a category, do clustering within each category if not opts.category is None: category_labels = \ extract_metadata_column(otu_table.SampleIds, metadata, opts.category) sample_order = \ get_order_from_categories(otu_table, category_labels) # else: just use the mapping file order else: ordered_sample_ids = [] for sample_id in map_sample_ids: if sample_id in otu_table.SampleIds: ordered_sample_ids.append(sample_id) sample_order = names_to_indices( otu_table.SampleIds, ordered_sample_ids) # if no tree or mapping file, perform upgma euclidean elif not opts.suppress_column_clustering: data = asarray([i for i in otu_table.iterObservationData()]) sample_order = get_clusters(data, axis='column') # else just use OTU table ordering else: sample_order = arange(len(otu_table.SampleIds)) # re-order OTUs by tree (if provided), or clustering if not opts.otu_tree is None: # open tree file try: f = open(opts.otu_tree, 'U') except (TypeError, IOError): raise MissingFileError( "Couldn't read tree file at path: %s" % opts.otu_tree) otu_order = get_order_from_tree(otu_table.ObservationIds, f) f.close() # if no tree or mapping file, perform upgma euclidean elif not opts.suppress_row_clustering: data = asarray([i for i in otu_table.iterObservationData()]) otu_order = get_clusters(data, axis='row') # else just use OTU table ordering else: otu_order = arange(len(otu_table.ObservationIds)) # otu_order and sample_order should be ids, rather than indices # to use in sortObservationOrder/sortSampleOrder otu_id_order = [otu_table.ObservationIds[i] for i in otu_order] sample_id_order = [otu_table.SampleIds[i] for i in sample_order] # Re-order otu table, sampleids, etc. as necessary otu_table = otu_table.sortObservationOrder(otu_id_order) # otu_ids not used after: tagged for deletion otu_ids = array(otu_table.ObservationIds)[otu_order] otu_labels = array(otu_labels)[otu_order] otu_table = otu_table.sortSampleOrder(sample_id_order) sample_ids = array(otu_table.SampleIds)[sample_order] plot_heatmap(otu_table, otu_labels, sample_ids, filename=join(dir_path, 'heatmap.pdf'), color_scheme=opts.color_scheme)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) otu_table = parse_biom_table(open(opts.otu_table_fp, 'U')) lineages = [] if (otu_table.ObservationMetadata is None or 'taxonomy' not in otu_table.ObservationMetadata[0]): print '\n\nWarning: The lineages are missing from the OTU table. If you used single_rarefaction.py to create your otu_table, make sure you included the OTU lineages.\n' lineages = [''] * len(otu_table.ObservationIds) else: for val, id, meta in otu_table.iterObservations(): lineages.append([v for v in meta['taxonomy']]) otu_labels = make_otu_labels(otu_table.ObservationIds, lineages) # Convert to relative abundance if requested if not opts.absolute_abundance: otu_table = otu_table.normObservationBySample() # Get log transform if requested if not opts.no_log_transform: if not opts.log_eps is None and opts.log_eps <= 0: print "Parameter 'log_eps' must be positive. Value was", opts.log_eps exit(1) otu_table = get_log_transform(otu_table, opts.log_eps) if opts.output_dir: if os.path.exists(opts.output_dir): dir_path = opts.output_dir else: try: os.mkdir(opts.output_dir) dir_path = opts.output_dir except OSError: pass else: dir_path = './' # Re-order samples by tree if provided if not opts.sample_tree is None: sample_order = get_order_from_tree(otu_table.SampleIds, open(opts.sample_tree, 'U')) # if there's no sample tree, sort samples by mapping file elif not opts.map_fname is None: lines = open(opts.map_fname, 'U').readlines() metadata = list(parse_mapping_file(lines)) new_map, otu_table = get_overlapping_samples(metadata[0], otu_table) metadata[0] = new_map map_sample_ids = zip(*metadata[0])[0] # if there's a category, do clustering within each category if not opts.category is None: category_labels = \ extract_metadata_column(otu_table.SampleIds, metadata, opts.category) sample_order = \ get_order_from_categories(otu_table, category_labels) # else: just use the mapping file order else: ordered_sample_ids = [] for sample_id in map_sample_ids: if sample_id in otu_table.SampleIds: ordered_sample_ids.append(sample_id) sample_order = names_to_indices( otu_table.SampleIds, ordered_sample_ids) # if no tree or mapping file, perform upgma euclidean elif not opts.suppress_column_clustering: data = asarray([i for i in otu_table.iterObservationData()]) sample_order = get_clusters(data, axis='column') # else just use OTU table ordering else: sample_order = arange(len(otu_table.SampleIds)) # re-order OTUs by tree (if provided), or clustering if not opts.otu_tree is None: # open tree file try: f = open(opts.otu_tree, 'U') except (TypeError, IOError): raise MissingFileError( "Couldn't read tree file at path: %s" % opts.otu_tree) otu_order = get_order_from_tree(otu_table.ObservationIds, f) f.close() # if no tree or mapping file, perform upgma euclidean elif not opts.suppress_row_clustering: data = asarray([i for i in otu_table.iterObservationData()]) otu_order = get_clusters(data, axis='row') # else just use OTU table ordering else: otu_order = arange(len(otu_table.ObservationIds)) # otu_order and sample_order should be ids, rather than indices # to use in sortObservationOrder/sortSampleOrder otu_id_order = [otu_table.ObservationIds[i] for i in otu_order] sample_id_order = [otu_table.SampleIds[i] for i in sample_order] # Re-order otu table, sampleids, etc. as necessary otu_table = otu_table.sortObservationOrder(otu_id_order) # otu_ids not used after: tagged for deletion otu_ids = array(otu_table.ObservationIds)[otu_order] otu_labels = array(otu_labels)[otu_order] otu_table = otu_table.sortSampleOrder(sample_id_order) sample_ids = array(otu_table.SampleIds)[sample_order] plot_heatmap(otu_table, otu_labels, sample_ids, filename=join(dir_path, 'heatmap.pdf'), color_scheme=opts.color_scheme)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) #Get OTU counts sample_ids, otu_ids, otus, lineages = \ list(parse_otu_table(open(opts.otu_table_fp,'U'), count_map_f=float)) # set 'blank' lineages if not supplied if lineages == []: print '\n\nWarning: The lineages are missing from the OTU table. If you used single_rarefaction.py to create your otu_table, make sure you pass the "--lineages_included" option.\n' lineages = [''] * len(otu_ids) otu_labels = make_otu_labels(otu_ids, lineages) # Convert to relative abundance if requested if not opts.absolute_abundance: for i,row in enumerate(otus): if row.sum() > 0: otus[i] = row/row.sum() # Get log transform if requested if not opts.no_log_transform: if not opts.log_eps is None and opts.log_eps <= 0: print "Parameter 'log_eps' must be positive. Value was", opts.log_eps exit(1) otus = get_log_transform(otus, opts.log_eps) if opts.output_dir: if os.path.exists(opts.output_dir): dir_path=opts.output_dir else: try: os.mkdir(opts.output_dir) dir_path=opts.output_dir except OSError: pass else: dir_path='./' # Re-order samples by tree if provided if not opts.sample_tree is None: sample_order = get_order_from_tree(sample_ids, opts.sample_tree) # if there's no sample tree, sort samples by mapping file elif not opts.map_fname is None: lines = open(opts.map_fname,'U').readlines() metadata = list(parse_mapping_file(lines)) sample_ids, new_map, otus = \ get_overlapping_samples(sample_ids, metadata[0], otus) metadata[0] = new_map map_sample_ids = zip(*metadata[0])[0] # if there's a category, do clustering within each category if not opts.category is None: category_labels = \ extract_metadata_column(sample_ids, \ metadata, opts.category) sample_order = \ get_order_from_categories(otus, category_labels) # else: just use the mapping file order else: ordered_sample_ids = [] for sample_id in map_sample_ids: if sample_id in sample_ids: ordered_sample_ids.append(sample_id) sample_order = names_to_indices(sample_ids, ordered_sample_ids) # if no tree or mapping file, use euclidean upgma else: sample_order = arange(len(sample_ids)) # re-order OTUs by tree (if provided), or clustering if not opts.otu_tree is None: # open tree file try: f = open(opts.otu_tree, 'U') except (TypeError, IOError): raise MissingFileError, \ "Couldn't read tree file at path: %s" % opts.otu_tree otu_order = get_order_from_tree(otu_ids, f) f.close() # if no tree, use euclidean upgma else: otu_order = get_clusters(otus,axis='row') # Re-order otu table, sampleids, etc. as necessary otus = otus[otu_order,:] otu_ids = array(otu_ids)[otu_order] otu_labels = array(otu_labels)[otu_order] otus = otus[:,sample_order] sample_ids = array(sample_ids)[sample_order] plot_heatmap(otus, otu_labels, sample_ids, filename=join(dir_path,'heatmap.pdf'))
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) otu_table = load_table(opts.otu_table_fp) obs_md_category = opts.obs_md_category obs_md_level = opts.obs_md_level if obs_md_level is None: # grab the last level if the user didn't specify a level obs_md_level = -1 else: # convert to 0-based indexing obs_md_level -= 1 obs_md = otu_table.metadata(axis='observation') obs_md_labels = [] if (obs_md is None or obs_md_category not in obs_md[0]): obs_md_labels = [['']] * len(otu_table.ids(axis='observation')) else: for _, _, md in otu_table.iter(axis='observation'): current_md = md[obs_md_category] if obs_md_level < len(current_md): current_md_at_level = current_md[obs_md_level] else: current_md_at_level = '' obs_md_labels.append([current_md_at_level]) otu_labels = make_otu_labels(otu_table.ids(axis='observation'), obs_md_labels) # Convert to relative abundance if requested if not opts.absolute_abundance: otu_table = otu_table.norm(axis='observation') # Get log transform if requested if not opts.no_log_transform: otu_table = get_log_transform(otu_table) # Re-order samples by tree if provided if opts.sample_tree is not None: sample_order = get_order_from_tree(otu_table.ids(), open(opts.sample_tree, 'U')) # if there's no sample tree, sort samples by mapping file elif opts.map_fname is not None: lines = open(opts.map_fname, 'U').readlines() metadata = list(parse_mapping_file(lines)) new_map, otu_table = get_overlapping_samples(metadata[0], otu_table) metadata[0] = new_map map_sample_ids = zip(*metadata[0])[0] # if there's a category, do clustering within each category if opts.category is not None: category_labels = extract_metadata_column(otu_table.ids(), metadata, opts.category) sample_order = get_order_from_categories(otu_table, category_labels) # else: just use the mapping file order else: ordered_sample_ids = [] for sample_id in map_sample_ids: if otu_table.exists(sample_id): ordered_sample_ids.append(sample_id) sample_order = names_to_indices(otu_table.ids(), ordered_sample_ids) # if no tree or mapping file, perform upgma euclidean elif not opts.suppress_column_clustering: data = np.asarray([i for i in otu_table.iter_data(axis='observation')]) sample_order = get_clusters(data, axis='column') # else just use OTU table ordering else: sample_order = np.arange(len(otu_table.ids())) # re-order OTUs by tree (if provided), or clustering if opts.otu_tree is not None: # open tree file try: f = open(opts.otu_tree, 'U') except (TypeError, IOError): raise MissingFileError("Couldn't read tree file at path: %s" % opts.otu_tree) otu_order = get_order_from_tree(otu_table.ids(axis='observation'), f) f.close() # if no tree or mapping file, perform upgma euclidean elif not opts.suppress_row_clustering: data = np.asarray([i for i in otu_table.iter_data(axis='observation')]) otu_order = get_clusters(data, axis='row') # else just use OTU table ordering else: otu_order = np.arange(len(otu_table.ids(axis='observation'))) # otu_order and sample_order should be ids, rather than indices # to use in sortObservationOrder/sortSampleOrder otu_id_order = [otu_table.ids(axis='observation')[i] for i in otu_order] sample_id_order = [otu_table.ids()[i] for i in sample_order] # Re-order otu table, sampleids, etc. as necessary otu_table = otu_table.sort_order(otu_id_order, axis='observation') otu_labels = np.array(otu_labels)[otu_order] otu_table = otu_table.sort_order(sample_id_order) sample_labels = otu_table.ids() plot_heatmap(otu_table, otu_labels, sample_labels, opts.output_fp, imagetype=opts.imagetype, width=opts.width, height=opts.height, dpi=opts.dpi, color_scheme=opts.color_scheme)