def main(): args=parse_arguments(sys) try: file_handle_write=open(args.output,"wt") except EnvironmentError: sys.exit("Error: Unable to open output file: " + args.output) try: file_handle_read=open(args.input,"rt") except EnvironmentError: sys.exit("Error: Unable to read input file: " + args.input) # write the header to the new file file_handle_write.write(file_handle_read.readline()) # trim the taxonomy for line in file_handle_read: # ignore lines that are comments if line.startswith("#"): file_handle_write.write(line) else: data=line.rstrip().split("\t") if args.taxonomy_column is None: # try to figure out which column has the taxonomy data try: args.taxonomy_column=[index for index, value in enumerate(data) if "k__" in value][0] except IndexError: sys.exit("Error unable to find the taxonomy column. Please provide it with the option --taxonomy-column <0>.") data[args.taxonomy_column]=utilities.taxonomy_trim([data[args.taxonomy_column]])[0] file_handle_write.write("\t".join(data)+"\n") file_handle_read.close() file_handle_write.close()
def test_taxonomy_trim(self): """ Test the taxonomy trim function """ taxa = [ "k__k3;p__p3;c__c2;o__o3;f__;g__;s__", "k__k3;p__p3;c__c2;o__o3;f__f1;g__g1;s__", "k__k3;p__p3;c__c2;o__o3;f__f1;g__g1;s__s1", "k__k3;p__p3;c__c2;o__o3;f__f1;g__;s__" ] expected_taxa = [ "o__o3.f__.g__.s__", "g__g1.s__", "g__g1.s__s1", "f__f1.g__.s__" ] self.assertEqual(utilities.taxonomy_trim(taxa), expected_taxa)
#' ## Terminal Taxa #+ echo=False # plot the relative abundance of the top terminal taxa # get the terminal taxa terminal_taxa_relab, terminal_data_relab = utilities.terminal_taxa( taxonomy, relab_data) # get the top rows of terminal taxa top_terminal_taxa, top_terminal_data = utilities.top_rows(terminal_taxa_relab, terminal_data_relab, max_taxa, function="average") # reduce the taxa names to just the most specific identifier shorted_names = utilities.taxonomy_trim(top_terminal_taxa) # sort the data with the samples with the top terminal taxa first sorted_samples_terminal, sorted_data_terminal = utilities.sort_data( top_terminal_data[0], samples) transpose_top_terminal_data = numpy.transpose(top_terminal_data) sorted_top_terminal_data = numpy.transpose([ transpose_top_terminal_data[samples.index(sample)] for sample in sorted_samples_terminal ]) # add the remaining terminal taxa as "other" to the data shorted_names_plus_other, sorted_top_terminal_data_plus_other = visualizations.fill_taxonomy_other( shorted_names, sorted_top_terminal_data) document.plot_stacked_barchart(sorted_top_terminal_data_plus_other,
def main(): args = parse_arguments(sys) try: file_handle_write = open(args.output, "wt") except EnvironmentError: sys.exit("Error: Unable to open output file: " + args.output) try: if args.input.endswith(".gz"): file_handle_read = gzip.open(args.input, "rt") else: file_handle_read = open(args.input, "rt") except EnvironmentError: sys.exit("Error: Unable to read input file: " + args.input) # write the header to the new file header = file_handle_read.readline().rstrip().split("\t") # ignore comment if present if header[0].startswith(BIOM_COMMENT): header = file_handle_read.readline().rstrip().split("\t") # trim the taxonomy and sum species taxonomy_data = {} for line in file_handle_read: # ignore lines that are comments if line.startswith("#"): file_handle_write.write(line) else: data = line.rstrip().split("\t") if args.taxonomy_column is None: # try to figure out which column has the taxonomy data try: args.taxonomy_column = [ index for index, value in enumerate(data) if "k__" in value ][0] except IndexError: sys.exit( "Error unable to find the taxonomy column. Please provide it with the option --taxonomy-column <0>." ) if args.end_taxonomy_column is None: args.end_taxonomy_column = args.taxonomy_column new_taxonomy = utilities.taxonomy_trim( [data[args.taxonomy_column]])[0] data.pop(args.taxonomy_column) if new_taxonomy in taxonomy_data: data = [data[0]] + [ str(float(a) + float(b)) for a, b in zip(taxonomy_data[new_taxonomy][1:], data[1:]) ] taxonomy_data[new_taxonomy] = data # write the header old_taxon = header.pop(args.taxonomy_column) header[args.end_taxonomy_column] = old_taxon file_handle_write.write("\t".join(header) + "\n") # write the new data for taxon, data in taxonomy_data.items(): data[args.end_taxonomy_column] = taxon file_handle_write.write("\t".join(data) + "\n") file_handle_read.close() file_handle_write.close()