Python MetadataMap.hasUniqueCategoryValues示例

编程语言: Python

命名空间/包名称: qiime.util

类/类型: MetadataMap

方法/功能: hasUniqueCategoryValues

hotexamples.com的示例: 2

Python MetadataMap.hasUniqueCategoryValues - 已找到2个示例。这些是从开源项目中提取的最受好评的qiime.util.MetadataMap.hasUniqueCategoryValues现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

parseMetadataMap(5)

MetadataMap(4)

hasSingleCategoryValue(3)

mergeMappingFiles(2)

hasUniqueCategoryValues(1)

isNumericCategory(1)

示例#1

显示文件

文件： util.py 项目： teravest/emperor

def preprocess_mapping_file(data,
                            headers,
                            columns,
                            unique=False,
                            single=False,
                            clones=0):
    """Process a mapping file to expand the data or remove unuseful fields

    Inputs:
    data: mapping file data
    headers: mapping file headers
    columns: list of headers to keep, if one of these headers includes two
    ampersands, this function will create a new column by merging the delimited
    columns.
    unique: keep columns where all values are unique
    single: keep columns where all values are the same
    clones: number of times to replicate the metadata

    Outputs:
    data: processed mapping file data
    headers: processed mapping file headers
    """

    # The sample ID must always be there, else it's meaningless data
    if 'SampleID' != columns[0]:
        columns = ['SampleID'] + columns

    # process concatenated columns if needed
    merge = []
    for column in columns:
        if '&&' in column:
            merge.append(column)
    # each element needs several columns to be merged
    for new_column in merge:
        indices = [
            headers.index(header_name)
            for header_name in new_column.split('&&')
        ]

        # join all the fields of the metadata that are listed in indices
        for line in data:
            line.append(''.join([line[index] for index in indices]))
        headers.append(new_column)

    # remove all unique or singled valued columns
    if unique or single:
        columns_to_remove = []
        metadata = MetadataMap(mapping_file_to_dict(data, headers), [])

        # find columns that have values that are all unique
        if unique == True:
            columns_to_remove += [
                column_name for column_name in headers[1::]
                if metadata.hasUniqueCategoryValues(column_name)
            ]

        # remove categories where there is only one value
        if single == True:
            columns_to_remove += [
                column_name for column_name in headers[1::]
                if metadata.hasSingleCategoryValue(column_name)
            ]
        columns_to_remove = list(set(columns_to_remove))

        # remove the single or unique columns
        data, headers = keep_columns_from_mapping_file(data,
                                                       headers,
                                                       columns_to_remove,
                                                       negate=True)

    # remove anything not specified in the input
    data, headers = keep_columns_from_mapping_file(data, headers, columns)

    # sanitize the mapping file data and headers
    data, headers = sanitize_mapping_file(data, headers)

    # clones mean: replicate the metadata retagging the sample ids with a suffix
    if clones:
        out_data = []
        for index in range(0, clones):
            out_data.extend([[element[0] + '_%d' % index] + element[1::]
                             for element in data])
        data = out_data

    return data, headers

示例#2

显示文件

文件： util.py 项目： jessicalmetcalf/emperor

def preprocess_mapping_file(data, headers, columns, unique=False, single=False, clones=0):
    """Process a mapping file to expand the data or remove unuseful fields

    Inputs:
    data: mapping file data
    headers: mapping file headers
    columns: list of headers to keep, if one of these headers includes two
    ampersands, this function will create a new column by merging the delimited
    columns.
    unique: keep columns where all values are unique
    single: keep columns where all values are the same
    clones: number of times to replicate the metadata

    Outputs:
    data: processed mapping file data
    headers: processed mapping file headers
    """

    # The sample ID must always be there, else it's meaningless data
    if "SampleID" != columns[0]:
        columns = ["SampleID"] + columns

    # process concatenated columns if needed
    merge = []
    for column in columns:
        if "&&" in column:
            merge.append(column)
    # each element needs several columns to be merged
    for new_column in merge:
        indices = [headers.index(header_name) for header_name in new_column.split("&&")]

        # join all the fields of the metadata that are listed in indices
        for line in data:
            line.append("".join([line[index] for index in indices]))
        headers.append(new_column)

    # remove all unique or singled valued columns
    if unique or single:
        columns_to_remove = []
        metadata = MetadataMap(mapping_file_to_dict(data, headers), [])

        # find columns that have values that are all unique
        if unique == True:
            columns_to_remove += [
                column_name for column_name in headers[1::] if metadata.hasUniqueCategoryValues(column_name)
            ]

        # remove categories where there is only one value
        if single == True:
            columns_to_remove += [
                column_name for column_name in headers[1::] if metadata.hasSingleCategoryValue(column_name)
            ]
        columns_to_remove = list(set(columns_to_remove))

        # remove the single or unique columns
        data, headers = keep_columns_from_mapping_file(data, headers, columns_to_remove, negate=True)

    # remove anything not specified in the input
    data, headers = keep_columns_from_mapping_file(data, headers, columns)

    # sanitize the mapping file data and headers
    data, headers = sanitize_mapping_file(data, headers)

    # clones mean: replicate the metadata retagging the sample ids with a suffix
    if clones:
        out_data = []
        for index in range(0, clones):
            out_data.extend([[element[0] + "_%d" % index] + element[1::] for element in data])
        data = out_data

    return data, headers