def main(table_in, table_out, pathways, to_classic):
    # setup
    table = load_table(table_in)
    pathway_dict = get_pathway2kos()

    # get set of kos from pathways
    pathways_kos = set()
    for pathway in pathways:
        pathways_kos = pathways_kos | pathway_dict[pathway.strip()[-5:]]

    # get selected kos
    kos_to_keep = set(table.ids('observation')) & \
        pathways_kos
    if len(kos_to_keep) == 0:
        raise EmptySetERROR('Intersection created empty set')
    obs_ids = np.array(list(kos_to_keep))
    data = np.empty([len(obs_ids), len(table.ids('sample'))])
    for i, obs in enumerate(obs_ids):
        data[i] = table.data(obs, 'observation')

    # output
    new_table = Table(data, obs_ids, table.ids('sample'), type="OTU table")
    if to_classic:
        # print to tab delimited biom table
        f = open(table_out, 'w')
        f.write(new_table.to_tsv())
    else:
        # print json biom table
        new_table.to_json("filter_KOs_by_pathway.py", open(table_out, 'w'))
示例#2
0
def main():
    args = parser.parse_args()
    input_fp = args.input_biom
    output_fp = args.output_biom
    threshold = args.abundance_threshold
    as_fraction = args.abundance_as_fraction

    if as_fraction:
        if not 0 <= threshold <= 1:
            raise ValueError("The value passed for -n "
                             "(--abundance_as_fraction) must be in the "
                             "interval [0, 1]")

    if not as_fraction:
        if not str(threshold).replace('.', '', 1).isdigit():
            raise ValueError("If you want to express the minimum threshold as "
                             "a fraction of the total sequences in a sample, "
                             "use -n in combination with -f. Otherwise, if "
                             "you want to express the minimum threshold as an "
                             "absolute sequence count minimum, the value "
                             "passed for -n must be an integer.")

        threshold = int(threshold)

    input_table = load_table(input_fp)

    new_data = []
    append_new_data = new_data.append
    for abundances in input_table.iter_data():
        if as_fraction:
            abundance_fractions = abundances.astype(float) / sum(abundances)
            indices = [
                i for (i, j) in enumerate(abundance_fractions > threshold)
                if not j
            ]

        else:
            indices = [
                i for (i, j) in enumerate(abundances > threshold) if not j
            ]

        item_set = abundances.itemset
        for index in indices:
            item_set(index, 0)

        append_new_data(abundances)

    new_data = array(new_data).transpose()

    new_table = Table(new_data, input_table.ids('observation'),
                      input_table.ids(),
                      input_table.metadata(axis='observation'),
                      input_table.metadata())

    with open(output_fp, 'w') as output_fd:
        new_table.to_json('one-time generation', output_fd)
示例#3
0
def _1(data: biom.Table) -> BIOMV100Format:
    data = _drop_axis_metadata(data)

    ff = BIOMV100Format()
    with ff.open() as fh:
        fh.write(data.to_json(generated_by=_get_generated_by()))
    return ff
示例#4
0
def _1(data: biom.Table) -> BIOMV100Format:
    data = _drop_axis_metadata(data)

    ff = BIOMV100Format()
    with ff.open() as fh:
        fh.write(data.to_json(generated_by=_get_generated_by()))
    return ff
def generate_per_sample_biom(biom_file, limit):
    """Generate per-sample BIOM files

    Parameters
    ----------
    biom_file : str
        A filepath to a BIOM table
    limit : int or None
        Limit the number of tables to load

    Returns
    -------
    str
        The sample ID
    str
        The table in BIOM Format v1.0
    str
        The table in the classic OTU table format
    """
    table = load_table(biom_file)
    obs_ids = table.ids(axis='observation')
    obs_md = table.metadata(axis='observation')

    if limit is None:
        limit = np.inf

    count = 0
    for v, sample, _ in table.iter():
        if count >= limit:
            break

        single_sample = Table(v[:, np.newaxis], obs_ids, [sample], obs_md)
        single_sample.filter(lambda v_, i, md: v_ > 0, axis='observation')
        biomv1 = single_sample.to_json('AG')
        biomtxt = single_sample.to_tsv(
            header_key='taxonomy',
            header_value='taxonomy',
            metadata_formatter=lambda x: '; '.join(x))
        yield (sample, biomv1, biomtxt)
        count += 1