Python taxonomy_trim示例，biobakery_workflows.utilities.taxonomy_trim Python示例

示例#1

0

显示文件

def main():

    args=parse_arguments(sys)

    try:
        file_handle_write=open(args.output,"wt")
    except EnvironmentError:
        sys.exit("Error: Unable to open output file: " + args.output)

    try:
        file_handle_read=open(args.input,"rt")
    except EnvironmentError:
        sys.exit("Error: Unable to read input file: " + args.input)

    # write the header to the new file
    file_handle_write.write(file_handle_read.readline())
    
    # trim the taxonomy
    for line in file_handle_read:
        # ignore lines that are comments
        if line.startswith("#"):
            file_handle_write.write(line)
        else:
            data=line.rstrip().split("\t")
            if args.taxonomy_column is None:
                # try to figure out which column has the taxonomy data
                try:
                    args.taxonomy_column=[index for index, value in enumerate(data) if "k__" in value][0]
                except IndexError:
                    sys.exit("Error unable to find the taxonomy column. Please provide it with the option --taxonomy-column <0>.")
            data[args.taxonomy_column]=utilities.taxonomy_trim([data[args.taxonomy_column]])[0]
            file_handle_write.write("\t".join(data)+"\n")

    file_handle_read.close()
    file_handle_write.close()

示例#2

0

显示文件

文件： test_utilities.py 项目： zhaoxia413/biobakery_workflows

    def test_taxonomy_trim(self):
        """ Test the taxonomy trim function """

        taxa = [
            "k__k3;p__p3;c__c2;o__o3;f__;g__;s__",
            "k__k3;p__p3;c__c2;o__o3;f__f1;g__g1;s__",
            "k__k3;p__p3;c__c2;o__o3;f__f1;g__g1;s__s1",
            "k__k3;p__p3;c__c2;o__o3;f__f1;g__;s__"
        ]

        expected_taxa = [
            "o__o3.f__.g__.s__", "g__g1.s__", "g__g1.s__s1", "f__f1.g__.s__"
        ]

        self.assertEqual(utilities.taxonomy_trim(taxa), expected_taxa)

示例#3

0

显示文件

#' ## Terminal Taxa

#+ echo=False

# plot the relative abundance of the top terminal taxa
# get the terminal taxa
terminal_taxa_relab, terminal_data_relab = utilities.terminal_taxa(
    taxonomy, relab_data)
# get the top rows of terminal taxa
top_terminal_taxa, top_terminal_data = utilities.top_rows(terminal_taxa_relab,
                                                          terminal_data_relab,
                                                          max_taxa,
                                                          function="average")

# reduce the taxa names to just the most specific identifier
shorted_names = utilities.taxonomy_trim(top_terminal_taxa)

# sort the data with the samples with the top terminal taxa first
sorted_samples_terminal, sorted_data_terminal = utilities.sort_data(
    top_terminal_data[0], samples)
transpose_top_terminal_data = numpy.transpose(top_terminal_data)
sorted_top_terminal_data = numpy.transpose([
    transpose_top_terminal_data[samples.index(sample)]
    for sample in sorted_samples_terminal
])

# add the remaining terminal taxa as "other" to the data
shorted_names_plus_other, sorted_top_terminal_data_plus_other = visualizations.fill_taxonomy_other(
    shorted_names, sorted_top_terminal_data)

document.plot_stacked_barchart(sorted_top_terminal_data_plus_other,

示例#4

0

显示文件

def main():

    args = parse_arguments(sys)

    try:
        file_handle_write = open(args.output, "wt")
    except EnvironmentError:
        sys.exit("Error: Unable to open output file: " + args.output)

    try:
        if args.input.endswith(".gz"):
            file_handle_read = gzip.open(args.input, "rt")
        else:
            file_handle_read = open(args.input, "rt")
    except EnvironmentError:
        sys.exit("Error: Unable to read input file: " + args.input)

    # write the header to the new file
    header = file_handle_read.readline().rstrip().split("\t")

    # ignore comment if present
    if header[0].startswith(BIOM_COMMENT):
        header = file_handle_read.readline().rstrip().split("\t")

    # trim the taxonomy and sum species
    taxonomy_data = {}
    for line in file_handle_read:
        # ignore lines that are comments
        if line.startswith("#"):
            file_handle_write.write(line)
        else:
            data = line.rstrip().split("\t")
            if args.taxonomy_column is None:
                # try to figure out which column has the taxonomy data
                try:
                    args.taxonomy_column = [
                        index for index, value in enumerate(data)
                        if "k__" in value
                    ][0]
                except IndexError:
                    sys.exit(
                        "Error unable to find the taxonomy column. Please provide it with the option --taxonomy-column <0>."
                    )
            if args.end_taxonomy_column is None:
                args.end_taxonomy_column = args.taxonomy_column

            new_taxonomy = utilities.taxonomy_trim(
                [data[args.taxonomy_column]])[0]
            data.pop(args.taxonomy_column)
            if new_taxonomy in taxonomy_data:
                data = [data[0]] + [
                    str(float(a) + float(b))
                    for a, b in zip(taxonomy_data[new_taxonomy][1:], data[1:])
                ]
            taxonomy_data[new_taxonomy] = data

    # write the header
    old_taxon = header.pop(args.taxonomy_column)
    header[args.end_taxonomy_column] = old_taxon

    file_handle_write.write("\t".join(header) + "\n")

    # write the new data
    for taxon, data in taxonomy_data.items():
        data[args.end_taxonomy_column] = taxon
        file_handle_write.write("\t".join(data) + "\n")

    file_handle_read.close()
    file_handle_write.close()