示例#1
0
def augment(table: biom.Table, sampling_depth: int, augment_times: int, output_path_metadata: str,
        raw_metadata: qiime2.Metadata, with_replacement: bool = False, rarefy_start: bool = True) -> biom.Table:

    metadata = raw_metadata.to_dataframe()
    metadata = metadata.sort_index()

    all_df = table.to_dataframe().sort_index().sort_index(axis=1)
    ## change sorted table back to biom
    table = biom.Table(all_df.values, all_df.index.to_list(), all_df.columns.to_list())

    zero_df = all_df[all_df==0].fillna(0)
    zero_table = biom.Table(zero_df.values, zero_df.index.to_list(), zero_df.columns.to_list())

    sub_table = table.subsample(sampling_depth, axis='sample', by_id=False,
            with_replacement=with_replacement)

    if rarefy_start == True:
        output_table = zero_table.merge(sub_table)
    else:
        output_table = table

    output_metadata = metadata

    for i in range(augment_times):
        num = i+1
        sub_table = table.subsample(sampling_depth, axis='sample', by_id=False,
              with_replacement=with_replacement)
        sub_df = sub_table.to_dataframe().sort_index().sort_index(axis=1)

        ## rename
        sub_df_names = sub_df.columns.to_list()
        sub_df_names_added = [x + '_' + str(num) for x in sub_df_names]

        sub_df.columns = sub_df_names_added
        sub_table = biom.Table(sub_df.values, sub_df.index.to_list(), sub_df.columns.to_list())
        output_table = output_table.merge(sub_table)

        metadata_names = metadata.index.to_list()
        metadata_names_added = [x + '_' + str(num) for x in metadata_names]

        tmp_metadata = metadata.copy()
        tmp_metadata.index = metadata_names_added
        print(output_metadata)
        output_metadata = pd.concat((output_metadata, tmp_metadata))

    output_metadata.index.name = 'sample-id'
    output_metadata = qiime2.metadata.Metadata(output_metadata)
    output_metadata.save(output_path_metadata)

    if output_table.is_empty():
        raise ValueError('The output table contains no features.')

    return output_table
def subsample(table: biom.Table, subsampling_depth: int,
              axis: str) -> biom.Table:
    if axis == 'feature':
        # we are transposing the table due to biocore/biom-format#759
        table = table.transpose()

    if len(table.ids()) < subsampling_depth:
        raise ValueError('The subsampling depth exceeds the number of '
                         'elements on the desired axis. The maximum depth '
                         'is: %d.' % len(table.ids()))

    # the axis is always 'sample' due to the above transpose
    table = table.subsample(subsampling_depth, axis='sample', by_id=True)

    # the inverted axis is always observation due to the above transpose
    invaxis = 'observation'
    table.filter(lambda v, i, m: v.sum() > 0, axis=invaxis)

    if axis == 'feature':
        # reverse the transpose necessary due to biocore/biom-format#759
        table = table.transpose()

    if table.is_empty():
        raise ValueError('The subsampled table contains no samples or features'
                         ' (samples/features that sum to zero after filtering'
                         ' are automatically removed). It may be a good idea'
                         ' to double check that your table is valid/nonempty.')

    return table
示例#3
0
def rarefy(table: biom.Table, sampling_depth: int) -> biom.Table:
    table = table.subsample(sampling_depth, axis='sample', by_id=False)

    if table.is_empty():
        raise ValueError('The rarefied table contains no samples or features. '
                         'Verify your table is valid and that you provided a '
                         'shallow enough sampling depth.')

    return table
示例#4
0
def rarefy(table: biom.Table, sampling_depth: int) -> biom.Table:
    return table.subsample(sampling_depth, axis='sample', by_id=False)
示例#5
0
def rarefy(table: biom.Table, sampling_depth: int) -> biom.Table:
    return table.subsample(sampling_depth, axis='sample', by_id=False)