def main(table_in, table_out, pathways, to_classic): # setup table = load_table(table_in) pathway_dict = get_pathway2kos() # get set of kos from pathways pathways_kos = set() for pathway in pathways: pathways_kos = pathways_kos | pathway_dict[pathway.strip()[-5:]] # get selected kos kos_to_keep = set(table.ids('observation')) & \ pathways_kos if len(kos_to_keep) == 0: raise EmptySetERROR('Intersection created empty set') obs_ids = np.array(list(kos_to_keep)) data = np.empty([len(obs_ids), len(table.ids('sample'))]) for i, obs in enumerate(obs_ids): data[i] = table.data(obs, 'observation') # output new_table = Table(data, obs_ids, table.ids('sample'), type="OTU table") if to_classic: # print to tab delimited biom table f = open(table_out, 'w') f.write(new_table.to_tsv()) else: # print json biom table new_table.to_json("filter_KOs_by_pathway.py", open(table_out, 'w'))
def differentialtest(table: biom.Table, metadata: qiime2.Metadata, variable: str, taxonomy: TSVTaxonomyFormat) -> pd.DataFrame: if table.is_empty(): raise ValueError("The provided table object is empty") ## run the R script on the file with tempfile.TemporaryDirectory() as temp_dir_name: ## write the biom table to file input_table = os.path.join(temp_dir_name, 'table.tsv') input_metadata = os.path.join(temp_dir_name, 'metadata.tsv') with open(input_table, 'w') as fh: fh.write(table.to_tsv()) metadata.save(input_metadata) output = os.path.join(temp_dir_name, 'data.tsv') cmd = [ 'differentialtest.R', input_table, input_metadata, str(taxonomy), str(variable), str(output) ] run_commands([cmd]) data = pd.read_csv(output, sep='\t') data.index.name = 'Feature ID' return data
def SRS(table: biom.Table, c_min: int, set_seed: bool = True, seed: int = 1) -> biom.Table: if table.is_empty(): raise ValueError("The provided table object is empty") #normalized_table = biom.Table() ## run the R script on the file with tempfile.TemporaryDirectory() as temp_dir_name: ## write the biom table to file input_name = os.path.join(temp_dir_name, 'table.tsv') with open(input_name, 'w') as fh: fh.write(table.to_tsv()) cmd = ['SRS.R', input_name, str(c_min), str(set_seed), str(seed)] run_commands([cmd]) norm_table_df = pd.read_csv(input_name, sep='\t') norm_table_biom = biom.Table(data=norm_table_df.values, observation_ids=norm_table_df.index, sample_ids=norm_table_df.columns) return norm_table_biom
def SRScurve(output_dir: str, table: biom.Table, metric: str = 'richness', step: int = 50, sample: int = 0, max_sample_size: int = 0, rarefy_comparison: bool = False, rarefy_repeats: int = 10, rarefy_comparison_legend: bool = False, srs_color: str = 'black', rarefy_color: str = 'red', srs_linetype: str = 'solid', rarefy_linetype: str = 'longdash', label: bool = False) -> None: if table.is_empty(): raise ValueError("The provided table object is empty") ## run the R script on the file with tempfile.TemporaryDirectory() as temp_dir_name: ## write the biom table to file input_name = os.path.join(temp_dir_name, 'table.tsv') #input_name = 'table.tsv' with open(input_name, 'w') as fh: fh.write(table.to_tsv()) #table_df = pd.read_csv(input_name, sep='\t') cmd = [ 'SRScurve.R', input_name, str(metric), str(step), str(sample), str(max_sample_size), str(rarefy_comparison), str(rarefy_repeats), str(rarefy_comparison_legend), str(srs_color), str(rarefy_color), str(srs_linetype), str(rarefy_linetype), str(label), str(output_dir) ] run_commands([cmd]) plot = os.path.join(output_dir, 'plot.png') index = os.path.join(output_dir, 'index.html') with open(index, 'w') as fh: fh.write( '<!DOCTYPE html><head></head><body><img src="SRScurve_plot.png" style="max-width: 100vw;max-height: 100vh;object-fit: contain" /></body></html>' )
def alpha(table: biom.Table) -> AlphaDiversityFormat: if table.is_empty(): raise ValueError("The provided table object is empty") output = AlphaDiversityFormat() ## run the R script on the file with tempfile.TemporaryDirectory() as temp_dir_name: ## write the biom table to file input_name = os.path.join(temp_dir_name, 'table.tsv') with open(input_name, 'w') as fh: fh.write(table.to_tsv()) cmd = ['run_new_richness.R', input_name, str(output)] run_commands([cmd]) return output
def generate_per_sample_biom(biom_file, limit): """Generate per-sample BIOM files Parameters ---------- biom_file : str A filepath to a BIOM table limit : int or None Limit the number of tables to load Returns ------- str The sample ID str The table in BIOM Format v1.0 str The table in the classic OTU table format """ table = load_table(biom_file) obs_ids = table.ids(axis='observation') obs_md = table.metadata(axis='observation') if limit is None: limit = np.inf count = 0 for v, sample, _ in table.iter(): if count >= limit: break single_sample = Table(v[:, np.newaxis], obs_ids, [sample], obs_md) single_sample.filter(lambda v_, i, md: v_ > 0, axis='observation') biomv1 = single_sample.to_json('AG') biomtxt = single_sample.to_tsv( header_key='taxonomy', header_value='taxonomy', metadata_formatter=lambda x: '; '.join(x)) yield (sample, biomv1, biomtxt) count += 1
def cross_validate_map( table: biom.Table, groups: pd.DataFrame, phylogeny: NewickFormat, full_phylogeny: NewickFormat, ) -> (biom.Table, pd.DataFrame): with tempfile.TemporaryDirectory() as temp_dir_name: input_table = os.path.join(temp_dir_name, 'table.tsv') with open(input_table, 'w') as fh: fh.write(table.to_tsv()) input_groups = os.path.join(temp_dir_name, 'groups.tsv') groups.to_csv(input_groups, sep='\t') biom_output = os.path.join(temp_dir_name, 'out_table.tsv') group_output = os.path.join(temp_dir_name, 'groups_out.tsv') cmd = [ 'run_crossVmap.R', input_table, input_groups, str(phylogeny), str(full_phylogeny), str(biom_output), str(group_output) ] try: print('Running Commands') run_commands([cmd]) except subprocess.CalledProcessError as e: raise Exception("An error was encountered with PhyloFactor" " in R (return code %d), please inspect stdout" " and stderr to learn more." % e.returncode) with open(biom_output) as fh: biom_table = biom.Table.from_tsv(fh, None, None, None) group_output_df = pd.read_csv(group_output, sep='\t') return biom_table, group_output_df