def test_get_log_transform(self): eps = .01 obs = get_log_transform(self.otu_table, eps=eps) xform = asarray(self.otu_table, dtype=float64) xform[xform==0] = eps for i, row in enumerate(obs): self.assertEqual(row, log10(xform[i]))
def test_get_log_transform(self): obs = get_log_transform(self.otu_table) data = [val for val in self.otu_table.iter_data(axis='observation')] xform = asarray(data, dtype=float64) for (i, val) in enumerate(obs.iter_data(axis='observation')): non_zeros = argwhere(xform[i] != 0) xform[i, non_zeros] = log10(xform[i, non_zeros]) assert_almost_equal(val, xform[i])
def test_get_log_transform(self): eps = .01 obs = get_log_transform(self.otu_table, eps=eps) data = [val for val in self.otu_table.iterObservationData()] xform = asarray(data, dtype=float64) xform[xform==0] = eps for (i, val) in enumerate(obs.iterObservationData()): self.assertEqual(val, log10(xform[i]))
def test_get_log_transform(self): eps = .01 obs = get_log_transform(self.otu_table, eps=eps) data = [val for val in self.otu_table.iterObservationData()] xform = asarray(data, dtype=float64) xform[xform == 0] = eps for (i, val) in enumerate(obs.iterObservationData()): assert_almost_equal(val, log10(xform[i]))
def test_get_log_transform(self): obs = get_log_transform(self.otu_table) data = [val for val in self.otu_table.iter_data(axis="observation")] xform = asarray(data, dtype=float64) for (i, val) in enumerate(obs.iter_data(axis="observation")): non_zeros = argwhere(xform[i] != 0) xform[i, non_zeros] = log10(xform[i, non_zeros]) assert_almost_equal(val, xform[i])
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) otu_table = parse_biom_table(open(opts.otu_table_fp, 'U')) lineages = [] if (otu_table.ObservationMetadata is None or 'taxonomy' not in otu_table.ObservationMetadata[0]): print '\n\nWarning: The lineages are missing from the OTU table. If you used single_rarefaction.py to create your otu_table, make sure you included the OTU lineages.\n' lineages = [''] * len(otu_table.ObservationIds) else: for val, id, meta in otu_table.iterObservations(): lineages.append([v for v in meta['taxonomy']]) otu_labels = make_otu_labels(otu_table.ObservationIds, lineages) # Convert to relative abundance if requested if not opts.absolute_abundance: otu_table = otu_table.normObservationBySample() # Get log transform if requested if not opts.no_log_transform: if not opts.log_eps is None and opts.log_eps <= 0: print "Parameter 'log_eps' must be positive. Value was", opts.log_eps exit(1) otu_table = get_log_transform(otu_table, opts.log_eps) if opts.output_dir: if os.path.exists(opts.output_dir): dir_path = opts.output_dir else: try: os.mkdir(opts.output_dir) dir_path = opts.output_dir except OSError: pass else: dir_path = './' # Re-order samples by tree if provided if not opts.sample_tree is None: sample_order = get_order_from_tree(otu_table.SampleIds, open(opts.sample_tree, 'U')) # if there's no sample tree, sort samples by mapping file elif not opts.map_fname is None: lines = open(opts.map_fname, 'U').readlines() metadata = list(parse_mapping_file(lines)) new_map, otu_table = get_overlapping_samples(metadata[0], otu_table) metadata[0] = new_map map_sample_ids = zip(*metadata[0])[0] # if there's a category, do clustering within each category if not opts.category is None: category_labels = \ extract_metadata_column(otu_table.SampleIds, \ metadata, opts.category) sample_order = \ get_order_from_categories(otu_table, category_labels) # else: just use the mapping file order else: ordered_sample_ids = [] for sample_id in map_sample_ids: if sample_id in otu_table.SampleIds: ordered_sample_ids.append(sample_id) sample_order = names_to_indices(otu_table.SampleIds, ordered_sample_ids) # if no tree or mapping file, perform upgma euclidean elif not opts.suppress_column_clustering: data = asarray([i for i in otu_table.iterObservationData()]) sample_order = get_clusters(data, axis='column') # else just use OTU table ordering else: sample_order = arange(len(otu_table.SampleIds)) # re-order OTUs by tree (if provided), or clustering if not opts.otu_tree is None: # open tree file try: f = open(opts.otu_tree, 'U') except (TypeError, IOError): raise MissingFileError, \ "Couldn't read tree file at path: %s" % opts.otu_tree otu_order = get_order_from_tree(otu_table.ObservationIds, f) f.close() # if no tree or mapping file, perform upgma euclidean elif not opts.suppress_row_clustering: data = asarray([i for i in otu_table.iterObservationData()]) otu_order = get_clusters(data, axis='row') # else just use OTU table ordering else: otu_order = arange(len(otu_table.ObservationIds)) # otu_order and sample_order should be ids, rather than indices # to use in sortObservationOrder/sortSampleOrder otu_id_order = [otu_table.ObservationIds[i] for i in otu_order] sample_id_order = [otu_table.SampleIds[i] for i in sample_order] # Re-order otu table, sampleids, etc. as necessary otu_table = otu_table.sortObservationOrder(otu_id_order) # otu_ids not used after: tagged for deletion otu_ids = array(otu_table.ObservationIds)[otu_order] otu_labels = array(otu_labels)[otu_order] otu_table = otu_table.sortSampleOrder(sample_id_order) sample_ids = array(otu_table.SampleIds)[sample_order] plot_heatmap(otu_table, otu_labels, sample_ids, filename=join(dir_path, 'heatmap.pdf'))
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) otu_table = load_table(opts.otu_table_fp) obs_md_category = opts.obs_md_category obs_md_level = opts.obs_md_level if obs_md_level is None: # grab the last level if the user didn't specify a level obs_md_level = -1 else: # convert to 0-based indexing obs_md_level -= 1 obs_md = otu_table.metadata(axis='observation') # create reference to the observation metadata for the first # observation for convenient lookup obs_md_0 = obs_md[0] obs_md_labels = [] if (obs_md is None or obs_md_category not in obs_md_0): obs_md_labels = [['']] * len(otu_table.ids(axis='observation')) else: for _, _, md in otu_table.iter(axis='observation'): current_md = md[obs_md_category] if obs_md_level < len(current_md): current_md_at_level = current_md[obs_md_level] else: current_md_at_level = '' obs_md_labels.append([current_md_at_level]) otu_labels = make_otu_labels(otu_table.ids(axis='observation'), obs_md_labels) # Convert to relative abundance if requested if not opts.absolute_abundance: otu_table = otu_table.norm(axis='observation') # Get log transform if requested if not opts.no_log_transform: otu_table = get_log_transform(otu_table) # Re-order samples by tree if provided if opts.sample_tree is not None: sample_order = get_order_from_tree(otu_table.ids(), open(opts.sample_tree, 'U')) # if there's no sample tree, sort samples by mapping file elif opts.map_fname is not None: lines = open(opts.map_fname, 'U').readlines() metadata = list(parse_mapping_file(lines)) new_map, otu_table = get_overlapping_samples(metadata[0], otu_table) metadata[0] = new_map map_sample_ids = zip(*metadata[0])[0] # if there's a category, do clustering within each category if opts.category is not None: category_labels = extract_metadata_column(otu_table.ids(), metadata, opts.category) sample_order = get_order_from_categories(otu_table, category_labels) # else: just use the mapping file order else: ordered_sample_ids = [] for sample_id in map_sample_ids: if otu_table.exists(sample_id): ordered_sample_ids.append(sample_id) sample_order = names_to_indices( otu_table.ids(), ordered_sample_ids) # if no tree or mapping file, perform upgma euclidean elif not opts.suppress_column_clustering: data = np.asarray([i for i in otu_table.iter_data(axis='observation')]) sample_order = get_clusters(data, axis='column') # else just use OTU table ordering else: sample_order = np.arange(len(otu_table.ids())) # re-order OTUs by tree (if provided), or clustering if opts.otu_tree is not None: # open tree file try: f = open(opts.otu_tree, 'U') except (TypeError, IOError): raise MissingFileError("Couldn't read tree file at path: %s" % opts.otu_tree) otu_order = get_order_from_tree(otu_table.ids(axis='observation'), f) f.close() # if no tree or mapping file, perform upgma euclidean elif not opts.suppress_row_clustering: data = np.asarray([i for i in otu_table.iter_data(axis='observation')]) otu_order = get_clusters(data, axis='row') # else just use OTU table ordering else: otu_order = np.arange(len(otu_table.ids(axis='observation'))) # otu_order and sample_order should be ids, rather than indices # to use in sortObservationOrder/sortSampleOrder otu_id_order = [otu_table.ids(axis='observation')[i] for i in otu_order] sample_id_order = [otu_table.ids()[i] for i in sample_order] # Re-order otu table, sampleids, etc. as necessary otu_table = otu_table.sort_order(otu_id_order, axis='observation') # otu_ids not used after: tagged for deletion otu_ids = np.array(otu_table.ids(axis='observation'))[otu_order] otu_labels = np.array(otu_labels)[otu_order] otu_table = otu_table.sort_order(sample_id_order) sample_ids = np.array(otu_table.ids())[sample_order] plot_heatmap(otu_table, otu_labels, sample_ids, opts.output_fp, imagetype=opts.imagetype, width=opts.width, height=opts.height, dpi=opts.dpi, color_scheme=opts.color_scheme)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) otu_table = parse_biom_table(open(opts.otu_table_fp, 'U')) obs_md_category = opts.obs_md_category obs_md_level = opts.obs_md_level if obs_md_level is None: # grab the last level if the user didn't specify a level obs_md_level = -1 else: # convert to 0-based indexing obs_md_level -= 1 obs_md = otu_table.ObservationMetadata # create reference to the observation metadata for the first # observation for convenient lookup obs_md_0 = obs_md[0] obs_md_labels = [] if (obs_md is None or obs_md_category not in obs_md_0): obs_md_labels = [['']] * len(otu_table.ObservationIds) else: for _, _, md in otu_table.iterObservations(): current_md = md[obs_md_category] if obs_md_level < len(current_md): current_md_at_level = current_md[obs_md_level] else: current_md_at_level = '' obs_md_labels.append([current_md_at_level]) otu_labels = make_otu_labels(otu_table.ObservationIds, obs_md_labels) # Convert to relative abundance if requested if not opts.absolute_abundance: otu_table = otu_table.normObservationBySample() # Get log transform if requested if not opts.no_log_transform: if not opts.log_eps is None and opts.log_eps <= 0: print "Parameter 'log_eps' must be positive. Value was", opts.log_eps exit(1) otu_table = get_log_transform(otu_table, opts.log_eps) if opts.output_dir: if os.path.exists(opts.output_dir): dir_path = opts.output_dir else: try: os.mkdir(opts.output_dir) dir_path = opts.output_dir except OSError: pass else: dir_path = './' # Re-order samples by tree if provided if not opts.sample_tree is None: sample_order = get_order_from_tree(otu_table.SampleIds, open(opts.sample_tree, 'U')) # if there's no sample tree, sort samples by mapping file elif not opts.map_fname is None: lines = open(opts.map_fname, 'U').readlines() metadata = list(parse_mapping_file(lines)) new_map, otu_table = get_overlapping_samples(metadata[0], otu_table) metadata[0] = new_map map_sample_ids = zip(*metadata[0])[0] # if there's a category, do clustering within each category if not opts.category is None: category_labels = \ extract_metadata_column(otu_table.SampleIds, metadata, opts.category) sample_order = \ get_order_from_categories(otu_table, category_labels) # else: just use the mapping file order else: ordered_sample_ids = [] for sample_id in map_sample_ids: if sample_id in otu_table.SampleIds: ordered_sample_ids.append(sample_id) sample_order = names_to_indices( otu_table.SampleIds, ordered_sample_ids) # if no tree or mapping file, perform upgma euclidean elif not opts.suppress_column_clustering: data = asarray([i for i in otu_table.iterObservationData()]) sample_order = get_clusters(data, axis='column') # else just use OTU table ordering else: sample_order = arange(len(otu_table.SampleIds)) # re-order OTUs by tree (if provided), or clustering if not opts.otu_tree is None: # open tree file try: f = open(opts.otu_tree, 'U') except (TypeError, IOError): raise MissingFileError( "Couldn't read tree file at path: %s" % opts.otu_tree) otu_order = get_order_from_tree(otu_table.ObservationIds, f) f.close() # if no tree or mapping file, perform upgma euclidean elif not opts.suppress_row_clustering: data = asarray([i for i in otu_table.iterObservationData()]) otu_order = get_clusters(data, axis='row') # else just use OTU table ordering else: otu_order = arange(len(otu_table.ObservationIds)) # otu_order and sample_order should be ids, rather than indices # to use in sortObservationOrder/sortSampleOrder otu_id_order = [otu_table.ObservationIds[i] for i in otu_order] sample_id_order = [otu_table.SampleIds[i] for i in sample_order] # Re-order otu table, sampleids, etc. as necessary otu_table = otu_table.sortObservationOrder(otu_id_order) # otu_ids not used after: tagged for deletion otu_ids = array(otu_table.ObservationIds)[otu_order] otu_labels = array(otu_labels)[otu_order] otu_table = otu_table.sortSampleOrder(sample_id_order) sample_ids = array(otu_table.SampleIds)[sample_order] plot_heatmap(otu_table, otu_labels, sample_ids, filename=join(dir_path, 'heatmap.pdf'), color_scheme=opts.color_scheme)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) data = {} # Open and get coord data otu_table = get_otu_counts(opts.otu_table_fp) # determine whether fractional values are present in OTU table num_otu_hits = opts.num_otu_hits if opts.log_transform: otu_table = get_log_transform(otu_table) num_otu_hits = 0 fractional_values = False max_val = -1 for val in otu_table.iter_data(axis='observation'): max_val = maximum(max_val, val.max()) # the data cannot be of mixed types: if one is float, all are float fractional_values = ( max_val.dtype.name == 'float32' or max_val.dtype.name == 'float64') if fractional_values and max_val <= 1: if num_otu_hits > 0: print ("Warning: OTU table appears to be using relative " "abundances and num_otu_hits was set to %d. Setting " "num_otu_hits to 0." % num_otu_hits) num_otu_hits = 0 filepath = opts.otu_table_fp filename = filepath.strip().split('/')[-1].split('.')[0] dir_path = opts.output_dir create_dir(dir_path) js_dir_path = os.path.join(dir_path, 'js') create_dir(js_dir_path) qiime_dir = get_qiime_project_dir() js_path = os.path.join(qiime_dir, 'qiime/support_files/js') shutil.copyfile(os.path.join(js_path, 'overlib.js'), os.path.join(js_dir_path, 'overlib.js')) shutil.copyfile( os.path.join(js_path, 'otu_count_display.js'), os.path.join(js_dir_path, 'otu_count_display.js')) shutil.copyfile(os.path.join(js_path, 'jquery.js'), os.path.join(js_dir_path, 'jquery.js')) shutil.copyfile( os.path.join(js_path, 'jquery.tablednd_0_5.js'), os.path.join(js_dir_path, 'jquery.tablednd_0_5.js')) # load tree for sorting OTUs ordered_otu_names = None if opts.tree is not None: try: f = open(opts.tree, 'U') except (TypeError, IOError): raise TreeMissingError( "Couldn't read tree file at path: %s" % tree_source) tree = parse_newick(f, PhyloNode) f.close() ordered_otu_names = [tip.Name for tip in tree.iterTips()] ordered_sample_names = None # load tree for sorting Samples if opts.sample_tree is not None: try: f = open(opts.sample_tree, 'U') except (TypeError, IOError): raise TreeMissingError( "Couldn't read tree file at path: %s" % tree_source) tree = parse_newick(f, PhyloNode) f.close() ordered_sample_names = [tip.Name for tip in tree.iterTips()] # if there's no sample tree, load sample map for sorting samples elif opts.map_fname is not None: lines = open(opts.map_fname, 'U').readlines() map = parse_mapping_file(lines)[0] ordered_sample_names = [row[0] for row in map] try: action = generate_heatmap_plots except NameError: action = None # Place this outside try/except so we don't mask NameError in action if action: action( num_otu_hits, otu_table, ordered_otu_names, ordered_sample_names, dir_path, js_dir_path, filename, fractional_values)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) data = {} # Open and get coord data otu_table = get_otu_counts(opts.otu_table_fp) # determine whether fractional values are present in OTU table num_otu_hits = opts.num_otu_hits if opts.log_transform: otu_table = get_log_transform(otu_table) num_otu_hits = 0 fractional_values = False max_val = -1 for val in otu_table.iter_data(axis='observation'): max_val = maximum(max_val, val.max()) # the data cannot be of mixed types: if one is float, all are float fractional_values = (max_val.dtype.name == 'float32' or max_val.dtype.name == 'float64') if fractional_values and max_val <= 1: if num_otu_hits > 0: print("Warning: OTU table appears to be using relative " "abundances and num_otu_hits was set to %d. Setting " "num_otu_hits to 0." % num_otu_hits) num_otu_hits = 0 filepath = opts.otu_table_fp filename = filepath.strip().split('/')[-1].split('.')[0] dir_path = opts.output_dir create_dir(dir_path) js_dir_path = os.path.join(dir_path, 'js') create_dir(js_dir_path) qiime_dir = get_qiime_project_dir() js_path = os.path.join(qiime_dir, 'qiime/support_files/js') shutil.copyfile(os.path.join(js_path, 'overlib.js'), os.path.join(js_dir_path, 'overlib.js')) shutil.copyfile(os.path.join(js_path, 'otu_count_display.js'), os.path.join(js_dir_path, 'otu_count_display.js')) shutil.copyfile(os.path.join(js_path, 'jquery.js'), os.path.join(js_dir_path, 'jquery.js')) shutil.copyfile(os.path.join(js_path, 'jquery.tablednd_0_5.js'), os.path.join(js_dir_path, 'jquery.tablednd_0_5.js')) # load tree for sorting OTUs ordered_otu_names = None if opts.tree is not None: try: f = open(opts.tree, 'U') except (TypeError, IOError): raise TreeMissingError("Couldn't read tree file at path: %s" % tree_source) tree = parse_newick(f, PhyloNode) f.close() ordered_otu_names = [tip.Name for tip in tree.iterTips()] ordered_sample_names = None # load tree for sorting Samples if opts.sample_tree is not None: try: f = open(opts.sample_tree, 'U') except (TypeError, IOError): raise TreeMissingError("Couldn't read tree file at path: %s" % tree_source) tree = parse_newick(f, PhyloNode) f.close() ordered_sample_names = [tip.Name for tip in tree.iterTips()] # if there's no sample tree, load sample map for sorting samples elif opts.map_fname is not None: lines = open(opts.map_fname, 'U').readlines() map = parse_mapping_file(lines)[0] ordered_sample_names = [row[0] for row in map] try: action = generate_heatmap_plots except NameError: action = None # Place this outside try/except so we don't mask NameError in action if action: action(num_otu_hits, otu_table, ordered_otu_names, ordered_sample_names, dir_path, js_dir_path, filename, fractional_values)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) otu_table = parse_biom_table(open(opts.otu_table_fp, 'U')) lineages = [] if (otu_table.ObservationMetadata is None or 'taxonomy' not in otu_table.ObservationMetadata[0]): print '\n\nWarning: The lineages are missing from the OTU table. If you used single_rarefaction.py to create your otu_table, make sure you included the OTU lineages.\n' lineages = [''] * len(otu_table.ObservationIds) else: for val, id, meta in otu_table.iterObservations(): lineages.append([v for v in meta['taxonomy']]) otu_labels = make_otu_labels(otu_table.ObservationIds, lineages) # Convert to relative abundance if requested if not opts.absolute_abundance: otu_table = otu_table.normObservationBySample() # Get log transform if requested if not opts.no_log_transform: if not opts.log_eps is None and opts.log_eps <= 0: print "Parameter 'log_eps' must be positive. Value was", opts.log_eps exit(1) otu_table = get_log_transform(otu_table, opts.log_eps) if opts.output_dir: if os.path.exists(opts.output_dir): dir_path = opts.output_dir else: try: os.mkdir(opts.output_dir) dir_path = opts.output_dir except OSError: pass else: dir_path = './' # Re-order samples by tree if provided if not opts.sample_tree is None: sample_order = get_order_from_tree(otu_table.SampleIds, open(opts.sample_tree, 'U')) # if there's no sample tree, sort samples by mapping file elif not opts.map_fname is None: lines = open(opts.map_fname, 'U').readlines() metadata = list(parse_mapping_file(lines)) new_map, otu_table = get_overlapping_samples(metadata[0], otu_table) metadata[0] = new_map map_sample_ids = zip(*metadata[0])[0] # if there's a category, do clustering within each category if not opts.category is None: category_labels = \ extract_metadata_column(otu_table.SampleIds, metadata, opts.category) sample_order = \ get_order_from_categories(otu_table, category_labels) # else: just use the mapping file order else: ordered_sample_ids = [] for sample_id in map_sample_ids: if sample_id in otu_table.SampleIds: ordered_sample_ids.append(sample_id) sample_order = names_to_indices( otu_table.SampleIds, ordered_sample_ids) # if no tree or mapping file, perform upgma euclidean elif not opts.suppress_column_clustering: data = asarray([i for i in otu_table.iterObservationData()]) sample_order = get_clusters(data, axis='column') # else just use OTU table ordering else: sample_order = arange(len(otu_table.SampleIds)) # re-order OTUs by tree (if provided), or clustering if not opts.otu_tree is None: # open tree file try: f = open(opts.otu_tree, 'U') except (TypeError, IOError): raise MissingFileError( "Couldn't read tree file at path: %s" % opts.otu_tree) otu_order = get_order_from_tree(otu_table.ObservationIds, f) f.close() # if no tree or mapping file, perform upgma euclidean elif not opts.suppress_row_clustering: data = asarray([i for i in otu_table.iterObservationData()]) otu_order = get_clusters(data, axis='row') # else just use OTU table ordering else: otu_order = arange(len(otu_table.ObservationIds)) # otu_order and sample_order should be ids, rather than indices # to use in sortObservationOrder/sortSampleOrder otu_id_order = [otu_table.ObservationIds[i] for i in otu_order] sample_id_order = [otu_table.SampleIds[i] for i in sample_order] # Re-order otu table, sampleids, etc. as necessary otu_table = otu_table.sortObservationOrder(otu_id_order) # otu_ids not used after: tagged for deletion otu_ids = array(otu_table.ObservationIds)[otu_order] otu_labels = array(otu_labels)[otu_order] otu_table = otu_table.sortSampleOrder(sample_id_order) sample_ids = array(otu_table.SampleIds)[sample_order] plot_heatmap(otu_table, otu_labels, sample_ids, filename=join(dir_path, 'heatmap.pdf'), color_scheme=opts.color_scheme)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) #Get OTU counts sample_ids, otu_ids, otus, lineages = \ list(parse_otu_table(open(opts.otu_table_fp,'U'), count_map_f=float)) # set 'blank' lineages if not supplied if lineages == []: print '\n\nWarning: The lineages are missing from the OTU table. If you used single_rarefaction.py to create your otu_table, make sure you pass the "--lineages_included" option.\n' lineages = [''] * len(otu_ids) otu_labels = make_otu_labels(otu_ids, lineages) # Convert to relative abundance if requested if not opts.absolute_abundance: for i,row in enumerate(otus): if row.sum() > 0: otus[i] = row/row.sum() # Get log transform if requested if not opts.no_log_transform: if not opts.log_eps is None and opts.log_eps <= 0: print "Parameter 'log_eps' must be positive. Value was", opts.log_eps exit(1) otus = get_log_transform(otus, opts.log_eps) if opts.output_dir: if os.path.exists(opts.output_dir): dir_path=opts.output_dir else: try: os.mkdir(opts.output_dir) dir_path=opts.output_dir except OSError: pass else: dir_path='./' # Re-order samples by tree if provided if not opts.sample_tree is None: sample_order = get_order_from_tree(sample_ids, opts.sample_tree) # if there's no sample tree, sort samples by mapping file elif not opts.map_fname is None: lines = open(opts.map_fname,'U').readlines() metadata = list(parse_mapping_file(lines)) sample_ids, new_map, otus = \ get_overlapping_samples(sample_ids, metadata[0], otus) metadata[0] = new_map map_sample_ids = zip(*metadata[0])[0] # if there's a category, do clustering within each category if not opts.category is None: category_labels = \ extract_metadata_column(sample_ids, \ metadata, opts.category) sample_order = \ get_order_from_categories(otus, category_labels) # else: just use the mapping file order else: ordered_sample_ids = [] for sample_id in map_sample_ids: if sample_id in sample_ids: ordered_sample_ids.append(sample_id) sample_order = names_to_indices(sample_ids, ordered_sample_ids) # if no tree or mapping file, use euclidean upgma else: sample_order = arange(len(sample_ids)) # re-order OTUs by tree (if provided), or clustering if not opts.otu_tree is None: # open tree file try: f = open(opts.otu_tree, 'U') except (TypeError, IOError): raise MissingFileError, \ "Couldn't read tree file at path: %s" % opts.otu_tree otu_order = get_order_from_tree(otu_ids, f) f.close() # if no tree, use euclidean upgma else: otu_order = get_clusters(otus,axis='row') # Re-order otu table, sampleids, etc. as necessary otus = otus[otu_order,:] otu_ids = array(otu_ids)[otu_order] otu_labels = array(otu_labels)[otu_order] otus = otus[:,sample_order] sample_ids = array(sample_ids)[sample_order] plot_heatmap(otus, otu_labels, sample_ids, filename=join(dir_path,'heatmap.pdf'))
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) otu_table = load_table(opts.otu_table_fp) obs_md_category = opts.obs_md_category obs_md_level = opts.obs_md_level if obs_md_level is None: # grab the last level if the user didn't specify a level obs_md_level = -1 else: # convert to 0-based indexing obs_md_level -= 1 obs_md = otu_table.metadata(axis='observation') obs_md_labels = [] if (obs_md is None or obs_md_category not in obs_md[0]): obs_md_labels = [['']] * len(otu_table.ids(axis='observation')) else: for _, _, md in otu_table.iter(axis='observation'): current_md = md[obs_md_category] if obs_md_level < len(current_md): current_md_at_level = current_md[obs_md_level] else: current_md_at_level = '' obs_md_labels.append([current_md_at_level]) otu_labels = make_otu_labels(otu_table.ids(axis='observation'), obs_md_labels) # Convert to relative abundance if requested if not opts.absolute_abundance: otu_table = otu_table.norm(axis='observation') # Get log transform if requested if not opts.no_log_transform: otu_table = get_log_transform(otu_table) # Re-order samples by tree if provided if opts.sample_tree is not None: sample_order = get_order_from_tree(otu_table.ids(), open(opts.sample_tree, 'U')) # if there's no sample tree, sort samples by mapping file elif opts.map_fname is not None: lines = open(opts.map_fname, 'U').readlines() metadata = list(parse_mapping_file(lines)) new_map, otu_table = get_overlapping_samples(metadata[0], otu_table) metadata[0] = new_map map_sample_ids = zip(*metadata[0])[0] # if there's a category, do clustering within each category if opts.category is not None: category_labels = extract_metadata_column(otu_table.ids(), metadata, opts.category) sample_order = get_order_from_categories(otu_table, category_labels) # else: just use the mapping file order else: ordered_sample_ids = [] for sample_id in map_sample_ids: if otu_table.exists(sample_id): ordered_sample_ids.append(sample_id) sample_order = names_to_indices(otu_table.ids(), ordered_sample_ids) # if no tree or mapping file, perform upgma euclidean elif not opts.suppress_column_clustering: data = np.asarray([i for i in otu_table.iter_data(axis='observation')]) sample_order = get_clusters(data, axis='column') # else just use OTU table ordering else: sample_order = np.arange(len(otu_table.ids())) # re-order OTUs by tree (if provided), or clustering if opts.otu_tree is not None: # open tree file try: f = open(opts.otu_tree, 'U') except (TypeError, IOError): raise MissingFileError("Couldn't read tree file at path: %s" % opts.otu_tree) otu_order = get_order_from_tree(otu_table.ids(axis='observation'), f) f.close() # if no tree or mapping file, perform upgma euclidean elif not opts.suppress_row_clustering: data = np.asarray([i for i in otu_table.iter_data(axis='observation')]) otu_order = get_clusters(data, axis='row') # else just use OTU table ordering else: otu_order = np.arange(len(otu_table.ids(axis='observation'))) # otu_order and sample_order should be ids, rather than indices # to use in sortObservationOrder/sortSampleOrder otu_id_order = [otu_table.ids(axis='observation')[i] for i in otu_order] sample_id_order = [otu_table.ids()[i] for i in sample_order] # Re-order otu table, sampleids, etc. as necessary otu_table = otu_table.sort_order(otu_id_order, axis='observation') otu_labels = np.array(otu_labels)[otu_order] otu_table = otu_table.sort_order(sample_id_order) sample_labels = otu_table.ids() plot_heatmap(otu_table, otu_labels, sample_labels, opts.output_fp, imagetype=opts.imagetype, width=opts.width, height=opts.height, dpi=opts.dpi, color_scheme=opts.color_scheme)