示例#1
0
def filter_values(table: biom.Table, th: float) -> biom.Table:
    """Filter out low-abundance features within each sample in a table.
    """
    def filter_otus(data, id_, md):
        bound = th if th > 1 else data.sum() * th
        data[data < bound] = 0
        return data

    table.transform(filter_otus, axis='sample')
    table.remove_empty(axis='observation')
    return table
示例#2
0
文件: biom.py 项目: qiyunzhu/woltka
def scale_biom(table: biom.Table, scale: float):
    """Multiply table values by a constant factor in place.

    Parameters
    ----------
    table : biom.Table
        BIOM table to multiply.
    scale : float
        Scale factor.
    """
    table.transform(lambda data, id_, md: data * scale, inplace=True)
示例#3
0
文件: biom.py 项目: qiyunzhu/woltka
def divide_biom(table: biom.Table, sizes: dict):
    """Divide table values by feature sizes in place.

    Parameters
    ----------
    table : biom.Table
        BIOM table to divide.
    sizes : dict
        Feature size dictionary.
    """
    table.transform(lambda data, id_, md: data / sizes[id_],
                    axis='observation',
                    inplace=True)
示例#4
0
文件: biom.py 项目: mortonjt/woltka
def round_biom(table: biom.Table):
    """Round a BIOM table's data to integers and drop empty observations
    in place.

    Parameters
    ----------
    table : biom.Table
        BIOM table to round.

    Notes
    -----
    This function will not drop empty samples.
    """
    f = np.vectorize(intize)
    table.transform(lambda data, id_, md: f(data), axis='observation')
    table.remove_empty(axis='observation')
示例#5
0
文件: biom.py 项目: qiyunzhu/woltka
def round_biom(table: biom.Table, digits=None):
    """Round a BIOM table's data and drop empty observations in place.

    Parameters
    ----------
    table : biom.Table
        BIOM table to round.
    digits : int, optional
        Digits after the decimal point.

    Notes
    -----
    This function will not drop empty samples.
    """
    f = np.vectorize(partial(rounder, digits=digits))
    table.transform(lambda data, id_, md: f(data), axis='observation')
    table.remove_empty(axis='observation')
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    otu_table = load_table(opts.input_otu_fp)

    ids_to_load = otu_table.ids(axis='observation')

    if(opts.input_count_fp is None):
        #precalc file has specific name (e.g. 16S_13_5_precalculated.tab.gz)
        precalc_file_name='_'.join(['16S',opts.gg_version,'precalculated.tab.gz'])
        input_count_table=join(get_picrust_project_dir(),'picrust','data',precalc_file_name)
    else:
        input_count_table=opts.input_count_fp

    if opts.verbose:
        print "Loading trait table: ", input_count_table

    ext=path.splitext(input_count_table)[1]

    if (ext == '.gz'):
        count_table_fh = gzip.open(input_count_table,'rb')
    else:
        count_table_fh = open(input_count_table,'U')

    if opts.load_precalc_file_in_biom:
        count_table = load_table(count_table_fh)
    else:
        count_table = convert_precalc_to_biom(count_table_fh, ids_to_load)

    #Need to only keep data relevant to our otu list
    ids=[]
    for x in otu_table.iter(axis='observation'):
        ids.append(str(x[1]))

    ob_id=count_table.ids(axis='observation')[0]

    filtered_otus=[]
    filtered_values=[]
    for x in ids:
        if count_table.exists(x, axis='sample'):
            filtered_otus.append(x)
            filtered_values.append(otu_table.data(x, axis='observation'))

    filtered_otu_table = Table(filtered_values, filtered_otus, otu_table.ids())

    copy_numbers_filtered={}
    for x in filtered_otus:
        value = count_table.get_value_by_ids(ob_id,x)
        try:
            #data can be floats so round them and make them integers
            value = int(round(float(value)))

        except ValueError:
            raise ValueError,\
                  "Invalid type passed as copy number for OTU ID %s. Must be int-able." % (value)
        if value < 1:
            raise ValueError, "Copy numbers must be greater than or equal to 1."

        copy_numbers_filtered[x]={opts.metadata_identifer:value}

    filtered_otu_table.add_metadata(copy_numbers_filtered, axis='observation')

    def metadata_norm(v, i, md):
        return v / float(md[opts.metadata_identifer])
    normalized_table = filtered_otu_table.transform(metadata_norm, axis='observation')

    #move Observation Metadata from original to filtered OTU table
    normalized_table = transfer_observation_metadata(otu_table, normalized_table, 'observation')

    make_output_dir_for_file(opts.output_otu_fp)
    write_biom_table(normalized_table, opts.output_otu_fp)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    otu_table = load_table(opts.input_otu_fp)

    ids_to_load = otu_table.ids(axis="observation")

    if opts.input_count_fp is None:
        # precalc file has specific name (e.g. 16S_13_5_precalculated.tab.gz)
        precalc_file_name = "_".join(["16S", opts.gg_version, "precalculated.tab.gz"])
        input_count_table = join(get_picrust_project_dir(), "picrust", "data", precalc_file_name)
    else:
        input_count_table = opts.input_count_fp

    if opts.verbose:
        print "Loading trait table: ", input_count_table

    ext = path.splitext(input_count_table)[1]

    if ext == ".gz":
        count_table_fh = gzip.open(input_count_table, "rb")
    else:
        count_table_fh = open(input_count_table, "U")

    if opts.load_precalc_file_in_biom:
        count_table = load_table(count_table_fh)
    else:
        count_table = convert_precalc_to_biom(count_table_fh, ids_to_load)

    # Need to only keep data relevant to our otu list
    ids = []
    for x in otu_table.iter(axis="observation"):
        ids.append(str(x[1]))

    ob_id = count_table.ids(axis="observation")[0]

    filtered_otus = []
    filtered_values = []
    for x in ids:
        if count_table.exists(x, axis="sample"):
            filtered_otus.append(x)
            filtered_values.append(otu_table.data(x, axis="observation"))

    filtered_otu_table = Table(filtered_values, filtered_otus, otu_table.ids())

    copy_numbers_filtered = {}
    for x in filtered_otus:
        value = count_table.get_value_by_ids(ob_id, x)
        try:
            # data can be floats so round them and make them integers
            value = int(round(float(value)))

        except ValueError:
            raise ValueError, "Invalid type passed as copy number for OTU ID %s. Must be int-able." % (value)
        if value < 1:
            raise ValueError, "Copy numbers must be greater than or equal to 1."

        copy_numbers_filtered[x] = {opts.metadata_identifer: value}

    filtered_otu_table.add_metadata(copy_numbers_filtered, axis="observation")

    def metadata_norm(v, i, md):
        return v / float(md[opts.metadata_identifer])

    normalized_table = filtered_otu_table.transform(metadata_norm, axis="observation")

    # move Observation Metadata from original to filtered OTU table
    normalized_table = transfer_observation_metadata(otu_table, normalized_table, "observation")

    make_output_dir_for_file(opts.output_otu_fp)
    write_biom_table(normalized_table, opts.output_otu_fp)