def simsam_range(table, tree, simulated_sample_sizes, dissimilarities, mapping_f=None): """Applies sim_otu_table over a range of parameters table: the input table to simulate samples from tree: tree related OTUs in input table simulated_sample_sizes: a list of ints defining how many output samples should be create per input sample dissimilarities: a list of floats containing the dissimilarities to use in simulating tables mapping_f: file handle for metadata mapping file, if a mapping file should be created with the samples from each simulated table This function will yield tuples with the following form: (output table, output mapping lines, simulated_sample_size, dissimilarity) If the user does not provide mapping_f, the tuples will look like: (output table, None, simulated_sample_size, dissimilarity) """ if mapping_f is not None: # if the user provided a mapping file, load it into # a list for repeated use, and define the function for # processing the mapping file mapping_lines = list(mapping_f) process_map = create_replicated_mapping_file else: # otherwise create a dummy function for processing the # mapping file so we don't have to check whether it # exists on every iteration mapping_lines = None def process_map(mapping_lines, simulated_sample_size, sample_ids): return None for simulated_sample_size in simulated_sample_sizes: # create the output mapping file data output_mapping_lines = process_map(mapping_lines, simulated_sample_size, table.ids()) for dissimilarity in dissimilarities: # create the simulated otu table output_sample_ids, output_otu_ids, output_data, output_metadata = sim_otu_table( table.ids(), table.ids(axis="observation").tolist(), table.iter(), table.metadata(axis="observation"), tree, simulated_sample_size, dissimilarity, ) output_table = Table( output_data, output_otu_ids, output_sample_ids, observation_metadata=output_metadata, generated_by=get_generated_by_for_biom_tables(), create_date=datetime.now().isoformat(), ) yield (output_table, output_mapping_lines, simulated_sample_size, dissimilarity)
def make_otu_table(otu_map_f, otu_to_taxonomy=None, delim='_', table_id=None, otu_ids_to_exclude=None, sample_metadata=None, seq_counts=None): """Generate a BIOM table from an OTU map Parameters ---------- otu_map_f : file-like object The OTU map. Jagged tab-separated file where the first column contains the OTU ID and subsequent columns contain sequence IDs belonging to that OTU otu_to_taxonomy : dict, optional Defaults to ``None``. If supplied, the dict maps OTU IDs to taxonomies delim : str, optional Defaults to "_". The delimiter that is used in the sequence IDs to join the sample ID to the sequence number table_id : object, optional Defaults to ``None``. The identifier that will be given to the generated BIOM table otu_ids_to_exclude : iterable, optional Defaults to ``None``. If present, these OTUs will not be added to the OTU table from the OTU map sample_metadata : dict of dicts, optional Defaults to ``None``. If supplied, keys in the outer dict should be sample IDs, and keys in the inner dicts should be column names. seq_counts : dict, optional Defaults to ``None``. If supplied, the dict maps seq ids to seq counts """ data, sample_ids, otu_ids = parse_otu_map( otu_map_f, delim=delim, otu_ids_to_exclude=otu_ids_to_exclude,seq_counts=seq_counts) if otu_to_taxonomy is not None: otu_metadata = [] for o in otu_ids: otu_metadata.append({'taxonomy': otu_to_taxonomy.get(o, ["None"])}) else: otu_metadata = None # if sample_metadata is supplied, put in index-order with the OTU map's # sample_ids, and do not include samples that were in the mapping file # but NOT in the OTU map if sample_metadata is not None: try: sample_metadata = [sample_metadata[sample_id] for sample_id in sample_ids] except KeyError: raise KeyError("Sample IDs found in OTU map without sample " "metadata") try: return Table(data, otu_ids, sample_ids, observation_metadata=otu_metadata, sample_metadata=sample_metadata, table_id=table_id, generated_by=get_generated_by_for_biom_tables(), create_date=datetime.now().isoformat()) except ValueError as e: raise ValueError("Couldn't create OTU table. Is your OTU map empty?" " Original error message: %s" % (str(e)))
def make_otu_table(otu_map_f, otu_to_taxonomy=None, delim='_', table_id=None, otu_ids_to_exclude=None, sample_metadata=None): """Generate a BIOM table from an OTU map Parameters ---------- otu_map_f : file-like object The OTU map. Jagged tab-separated file where the first column contains the OTU ID and subsequent columns contain sequence IDs belonging to that OTU otu_to_taxonomy : dict, optional Defaults to ``None``. If supplied, the dict maps OTU IDs to taxonomies delim : str, optional Defaults to "_". The delimiter that is used in the sequence IDs to join the sample ID to the sequence number table_id : object, optional Defaults to ``None``. The identifier that will be given to the generated BIOM table otu_ids_to_exclude : iterable, optional Defaults to ``None``. If present, these OTUs will not be added to the OTU table from the OTU map sample_metadata : iterable of dicts, optional Defaults to ``None``. """ data, sample_ids, otu_ids = parse_otu_map( otu_map_f, delim=delim, otu_ids_to_exclude=otu_ids_to_exclude) if otu_to_taxonomy is not None: otu_metadata = [] for o in otu_ids: otu_metadata.append({'taxonomy': otu_to_taxonomy.get(o, ["None"])}) else: otu_metadata = None # if sample_metadata is not None: # raise NotImplementedError("Passing of sample metadata to " # "make_otu_table is not currently supported.") try: return Table(data, otu_ids, sample_ids, observation_metadata=otu_metadata, sample_metadata=sample_metadata, table_id=table_id, generated_by=get_generated_by_for_biom_tables(), create_date=datetime.now().isoformat()) except ValueError as e: raise ValueError("Couldn't create OTU table. Is your OTU map empty?" " Original error message: %s" % (str(e)))
def make_otu_table(otu_map_f, otu_to_taxonomy=None, delim='_', table_id=None, otu_ids_to_exclude=None, sample_metadata=None): """Generate a BIOM table from an OTU map Parameters ---------- otu_map_f : file-like object The OTU map. Jagged tab-separated file where the first column contains the OTU ID and subsequent columns contain sequence IDs belonging to that OTU otu_to_taxonomy : dict, optional Defaults to ``None``. If supplied, the dict maps OTU IDs to taxonomies delim : str, optional Defaults to "_". The delimiter that is used in the sequence IDs to join the sample ID to the sequence number table_id : object, optional Defaults to ``None``. The identifier that will be given to the generated BIOM table otu_ids_to_exclude : iterable, optional Defaults to ``None``. If present, these OTUs will not be added to the OTU table from the OTU map sample_metadata : dict of dicts, optional Defaults to ``None``. If supplied, keys in the outer dict should be sample IDs, and keys in the inner dicts should be column names. """ data, sample_ids, otu_ids = parse_otu_map( otu_map_f, delim=delim, otu_ids_to_exclude=otu_ids_to_exclude) if otu_to_taxonomy is not None: otu_metadata = [] for o in otu_ids: otu_metadata.append({'taxonomy': otu_to_taxonomy.get(o, ["None"])}) else: otu_metadata = None # if sample_metadata is supplied, put in index-order with the OTU map's # sample_ids, and do not include samples that were in the mapping file # but NOT in the OTU map if sample_metadata is not None: try: sample_metadata = [ sample_metadata[sample_id] for sample_id in sample_ids ] except KeyError: raise KeyError("Sample IDs found in OTU map without sample " "metadata") try: return Table(data, otu_ids, sample_ids, observation_metadata=otu_metadata, sample_metadata=sample_metadata, table_id=table_id, generated_by=get_generated_by_for_biom_tables(), create_date=datetime.now().isoformat()) except ValueError as e: raise ValueError("Couldn't create OTU table. Is your OTU map empty?" " Original error message: %s" % (str(e)))
def simsam_range(table, tree, simulated_sample_sizes, dissimilarities, mapping_f=None): """Applies sim_otu_table over a range of parameters table: the input table to simulate samples from tree: tree related OTUs in input table simulated_sample_sizes: a list of ints defining how many output samples should be create per input sample dissimilarities: a list of floats containing the dissimilarities to use in simulating tables mapping_f: file handle for metadata mapping file, if a mapping file should be created with the samples from each simulated table This function will yield tuples with the following form: (output table, output mapping lines, simulated_sample_size, dissimilarity) If the user does not provide mapping_f, the tuples will look like: (output table, None, simulated_sample_size, dissimilarity) """ if mapping_f is not None: # if the user provided a mapping file, load it into # a list for repeated use, and define the function for # processing the mapping file mapping_lines = list(mapping_f) process_map = create_replicated_mapping_file else: # otherwise create a dummy function for processing the # mapping file so we don't have to check whether it # exists on every iteration mapping_lines = None def process_map(mapping_lines, simulated_sample_size, sample_ids): return None for simulated_sample_size in simulated_sample_sizes: # create the output mapping file data output_mapping_lines = \ process_map(mapping_lines, simulated_sample_size, table.ids()) for dissimilarity in dissimilarities: # create the simulated otu table output_sample_ids, output_otu_ids, output_data, output_metadata = \ sim_otu_table(table.ids(), table.ids(axis='observation').tolist(), table.iter(), table.metadata(axis='observation'), tree, simulated_sample_size, dissimilarity) output_table = Table( output_data, output_otu_ids, output_sample_ids, observation_metadata=output_metadata, generated_by=get_generated_by_for_biom_tables(), create_date=datetime.now().isoformat()) yield (output_table, output_mapping_lines, simulated_sample_size, dissimilarity)