def main(table_in, table_out, pathways, to_classic):
    # setup
    table = load_table(table_in)
    pathway_dict = get_pathway2kos()

    # get set of kos from pathways
    pathways_kos = set()
    for pathway in pathways:
        pathways_kos = pathways_kos | pathway_dict[pathway.strip()[-5:]]

    # get selected kos
    kos_to_keep = set(table.ids('observation')) & \
        pathways_kos
    if len(kos_to_keep) == 0:
        raise EmptySetERROR('Intersection created empty set')
    obs_ids = np.array(list(kos_to_keep))
    data = np.empty([len(obs_ids), len(table.ids('sample'))])
    for i, obs in enumerate(obs_ids):
        data[i] = table.data(obs, 'observation')

    # output
    new_table = Table(data, obs_ids, table.ids('sample'), type="OTU table")
    if to_classic:
        # print to tab delimited biom table
        f = open(table_out, 'w')
        f.write(new_table.to_tsv())
    else:
        # print json biom table
        new_table.to_json("filter_KOs_by_pathway.py", open(table_out, 'w'))
示例#2
0
def differentialtest(table: biom.Table, metadata: qiime2.Metadata,
                     variable: str,
                     taxonomy: TSVTaxonomyFormat) -> pd.DataFrame:

    if table.is_empty():
        raise ValueError("The provided table object is empty")
    ## run the R script on the file
    with tempfile.TemporaryDirectory() as temp_dir_name:
        ## write the biom table to file
        input_table = os.path.join(temp_dir_name, 'table.tsv')
        input_metadata = os.path.join(temp_dir_name, 'metadata.tsv')

        with open(input_table, 'w') as fh:
            fh.write(table.to_tsv())
        metadata.save(input_metadata)

        output = os.path.join(temp_dir_name, 'data.tsv')

        cmd = [
            'differentialtest.R', input_table, input_metadata,
            str(taxonomy),
            str(variable),
            str(output)
        ]
        run_commands([cmd])
        data = pd.read_csv(output, sep='\t')
        data.index.name = 'Feature ID'
    return data
示例#3
0
def SRS(table: biom.Table,
        c_min: int,
        set_seed: bool = True,
        seed: int = 1) -> biom.Table:
    if table.is_empty():
        raise ValueError("The provided table object is empty")

    #normalized_table = biom.Table()

    ## run the R script on the file
    with tempfile.TemporaryDirectory() as temp_dir_name:

        ## write the biom table to file
        input_name = os.path.join(temp_dir_name, 'table.tsv')
        with open(input_name, 'w') as fh:
            fh.write(table.to_tsv())

        cmd = ['SRS.R', input_name, str(c_min), str(set_seed), str(seed)]
        run_commands([cmd])
        norm_table_df = pd.read_csv(input_name, sep='\t')

    norm_table_biom = biom.Table(data=norm_table_df.values,
                                 observation_ids=norm_table_df.index,
                                 sample_ids=norm_table_df.columns)
    return norm_table_biom
示例#4
0
def SRScurve(output_dir: str,
             table: biom.Table,
             metric: str = 'richness',
             step: int = 50,
             sample: int = 0,
             max_sample_size: int = 0,
             rarefy_comparison: bool = False,
             rarefy_repeats: int = 10,
             rarefy_comparison_legend: bool = False,
             srs_color: str = 'black',
             rarefy_color: str = 'red',
             srs_linetype: str = 'solid',
             rarefy_linetype: str = 'longdash',
             label: bool = False) -> None:
    if table.is_empty():
        raise ValueError("The provided table object is empty")

    ## run the R script on the file
    with tempfile.TemporaryDirectory() as temp_dir_name:

        ## write the biom table to file
        input_name = os.path.join(temp_dir_name, 'table.tsv')
        #input_name = 'table.tsv'
        with open(input_name, 'w') as fh:
            fh.write(table.to_tsv())
    #table_df = pd.read_csv(input_name, sep='\t')

        cmd = [
            'SRScurve.R', input_name,
            str(metric),
            str(step),
            str(sample),
            str(max_sample_size),
            str(rarefy_comparison),
            str(rarefy_repeats),
            str(rarefy_comparison_legend),
            str(srs_color),
            str(rarefy_color),
            str(srs_linetype),
            str(rarefy_linetype),
            str(label),
            str(output_dir)
        ]
        run_commands([cmd])

    plot = os.path.join(output_dir, 'plot.png')
    index = os.path.join(output_dir, 'index.html')

    with open(index, 'w') as fh:
        fh.write(
            '<!DOCTYPE html><head></head><body><img src="SRScurve_plot.png" style="max-width: 100vw;max-height: 100vh;object-fit: contain" /></body></html>'
        )
示例#5
0
def alpha(table: biom.Table) -> AlphaDiversityFormat:
    if table.is_empty():
        raise ValueError("The provided table object is empty")

    output = AlphaDiversityFormat()
    ## run the R script on the file
    with tempfile.TemporaryDirectory() as temp_dir_name:

        ## write the biom table to file
        input_name = os.path.join(temp_dir_name, 'table.tsv')
        with open(input_name, 'w') as fh:
            fh.write(table.to_tsv())

        cmd = ['run_new_richness.R', input_name, str(output)]
        run_commands([cmd])
    return output
def generate_per_sample_biom(biom_file, limit):
    """Generate per-sample BIOM files

    Parameters
    ----------
    biom_file : str
        A filepath to a BIOM table
    limit : int or None
        Limit the number of tables to load

    Returns
    -------
    str
        The sample ID
    str
        The table in BIOM Format v1.0
    str
        The table in the classic OTU table format
    """
    table = load_table(biom_file)
    obs_ids = table.ids(axis='observation')
    obs_md = table.metadata(axis='observation')

    if limit is None:
        limit = np.inf

    count = 0
    for v, sample, _ in table.iter():
        if count >= limit:
            break

        single_sample = Table(v[:, np.newaxis], obs_ids, [sample], obs_md)
        single_sample.filter(lambda v_, i, md: v_ > 0, axis='observation')
        biomv1 = single_sample.to_json('AG')
        biomtxt = single_sample.to_tsv(
            header_key='taxonomy',
            header_value='taxonomy',
            metadata_formatter=lambda x: '; '.join(x))
        yield (sample, biomv1, biomtxt)
        count += 1
示例#7
0
def cross_validate_map(
    table: biom.Table,
    groups: pd.DataFrame,
    phylogeny: NewickFormat,
    full_phylogeny: NewickFormat,
) -> (biom.Table, pd.DataFrame):

    with tempfile.TemporaryDirectory() as temp_dir_name:
        input_table = os.path.join(temp_dir_name, 'table.tsv')
        with open(input_table, 'w') as fh:
            fh.write(table.to_tsv())

        input_groups = os.path.join(temp_dir_name, 'groups.tsv')
        groups.to_csv(input_groups, sep='\t')

        biom_output = os.path.join(temp_dir_name, 'out_table.tsv')
        group_output = os.path.join(temp_dir_name, 'groups_out.tsv')

        cmd = [
            'run_crossVmap.R', input_table, input_groups,
            str(phylogeny),
            str(full_phylogeny),
            str(biom_output),
            str(group_output)
        ]

        try:
            print('Running Commands')
            run_commands([cmd])
        except subprocess.CalledProcessError as e:
            raise Exception("An error was encountered with PhyloFactor"
                            " in R (return code %d), please inspect stdout"
                            " and stderr to learn more." % e.returncode)

        with open(biom_output) as fh:
            biom_table = biom.Table.from_tsv(fh, None, None, None)
        group_output_df = pd.read_csv(group_output, sep='\t')

    return biom_table, group_output_df