def test_format_otu_table(self): """format_otu_table should return tab-delimited table""" a = array([[1,2,3],[4,5,2718281828459045]]) samples = ['a','b','c'] otus = [1,2] taxa = ['Bacteria','Archaea'] res = format_otu_table(samples, otus, a,legacy=False) self.assertEqual(res, '# QIIME v%s OTU table\nOTU ID\ta\tb\tc\n1\t1\t2\t3\n2\t4\t5\t2718281828459045' % __version__) res = format_otu_table(samples, otus, a, taxa, legacy=False) self.assertEqual(res, '# QIIME v%s OTU table\nOTU ID\ta\tb\tc\tConsensus Lineage\n1\t1\t2\t3\tBacteria\n2\t4\t5\t2718281828459045\tArchaea' % __version__) self.assertRaises(ValueError, format_otu_table, samples, [1,2,3], a)
def test_format_otu_table(self): """format_otu_table should return tab-delimited table""" a = array([[1,2,3],[4,5,2718281828459045]]) samples = ['a','b','c'] otus = [1,2] taxa = ['Bacteria','Archaea'] res = format_otu_table(samples, otus, a) self.assertEqual(res, '#Full OTU Counts\n#OTU ID\ta\tb\tc\n1\t1\t2\t3\n2\t4\t5\t2718281828459045') res = format_otu_table(samples, otus, a, taxa) self.assertEqual(res, '#Full OTU Counts\n#OTU ID\ta\tb\tc\tConsensus Lineage\n1\t1\t2\t3\tBacteria\n2\t4\t5\t2718281828459045\tArchaea') self.assertRaises(ValueError, format_otu_table, samples, [1,2,3], a)
def filter_samples_from_otu_table(otu_table_lines, samples_to_discard, negate=False): """ Remove specified samples from OTU table """ otu_table_data = parse_otu_table(otu_table_lines) sample_lookup = {}.fromkeys([e.split()[0] for e in samples_to_discard]) new_otu_table_data = [] new_sample_ids = [] if negate: def keep_sample(s): return s in sample_lookup else: def keep_sample(s): return s not in sample_lookup sample_ids, otu_ids, otu_table_data, taxa = otu_table_data otu_table_data = otu_table_data.transpose() for row,sample_id in zip(otu_table_data,sample_ids): if keep_sample(sample_id): new_otu_table_data.append(row) new_sample_ids.append(sample_id) new_otu_table_data = array(new_otu_table_data).transpose() result = format_otu_table(new_sample_ids, otu_ids, new_otu_table_data, taxa, skip_empty=True).split('\n') return result
def make_new_otu_counts(otu_ids, sample_ids, otu_counts, consensus, \ sample_to_subtract, samples_from_subject): """make the converted otu table """ new_sample_ids = sample_to_subtract.keys() new_sample_ids.sort() new_otu_counts = zeros([len(otu_ids), len(new_sample_ids)]) for index1, otu in enumerate(otu_ids): for index2, sample in enumerate(new_sample_ids): tpz_sample = sample_to_subtract[sample] if tpz_sample in sample_ids: tpz_sample_index = sample_ids.index(tpz_sample) else: raise ValueError("There are samples in the category mapping file that are not in the otu table, such as sample: " + tpz_sample + ". Removing these samples from the category mapping file will allow you to proceed.") #get the new count as the relative abundance of the otu at #the later timepoint minus the relative abundance at timepoint zero old_sample_index = sample_ids.index(sample) new_count = otu_counts[index1, old_sample_index] - \ otu_counts[index1, tpz_sample_index] #make sure that the count is not zero across all of the subject's #samples has_nonzeros = False subject_sample_ids = samples_from_subject[sample] for i in subject_sample_ids: sample_index = sample_ids.index(i) if otu_counts[index1, sample_index] > 0: has_nonzeros = True if has_nonzeros: new_otu_counts[index1, index2] = new_count else: new_otu_counts[index1, index2] = 999999999 return format_otu_table(new_sample_ids, otu_ids, new_otu_counts, consensus)
def filter_otus_from_otu_table(otu_table_lines,otus_to_discard,negate=False): """ Remove specified OTUs from otu_table """ otu_table_data = parse_otu_table(otu_table_lines) otu_lookup = {}.fromkeys([e.split()[0] for e in otus_to_discard]) new_otu_table_data = [] new_otu_ids = [] new_taxa = [] if negate: def keep_otu(s): return s in otu_lookup else: def keep_otu(s): return s not in otu_lookup sample_ids, otu_ids, otu_table_data, taxa = otu_table_data for row,otu_id,taxonomy in zip(otu_table_data,otu_ids,taxa): if keep_otu(otu_id): new_otu_table_data.append(row) new_otu_ids.append(otu_id) new_taxa.append(taxonomy) new_otu_table_data = array(new_otu_table_data) result = format_otu_table(sample_ids, new_otu_ids, new_otu_table_data, new_taxa).split('\n') return result
def summarize_by_cat(map_lines,otu_sample_lines,category,norm): """creates the category otu table""" cat_by_sample, sample_by_cat, num_meta, meta_dict, label_lists_dict, \ num_samples_by_cat = get_sample_cat_info(map_lines,category) lines, otus, taxonomy = get_counts_by_cat(otu_sample_lines, num_meta, \ meta_dict,label_lists_dict[category],category,num_samples_by_cat,\ norm) #This for loop was added to remove columns that sum to 0, since you may #pass a mapping file that has more samples than in the OTU table, hence resulting #in columns with no counts new_labels=[] new_lines=[] for i,line in enumerate(zip(*lines)): total_col=sum([float(x) for x in line]) if total_col>0: new_lines.append(line) new_labels.append(label_lists_dict[category][i]) new_lines=zip(*new_lines) lines = format_otu_table(new_labels, otus, array(new_lines), \ taxonomy=taxonomy, comment='Category OTU Counts-%s'% category) return lines
def pool_otu_table(otu_infile, otu_outfile, pooled_sample_name, sample_ids_to_pool): """pools otu table file according to specified criteria.""" ## otu table otu_table = parse_otu_table(otu_infile) pool_sample_idxs = [] nonpool_sample_idxs = [] for i in range(len(otu_table[0])): #sample ids if otu_table[0][i] in sample_ids_to_pool: pool_sample_idxs.append(i) else: nonpool_sample_idxs.append(i) new_sample_ids = [] for i in range(len(otu_table[0])): #sample ids if otu_table[0][i] not in sample_ids_to_pool: # from valid_states string on mapfile new_sample_ids.append(otu_table[0][i]) new_sample_ids.append(pooled_sample_name) # otu mtx new_sample_abund = otu_table[2][:,pool_sample_idxs].sum(1) newdims = (len(otu_table[2]),len(new_sample_ids)) new_otu_mtx = numpy.zeros(newdims,dtype=otu_table[2].dtype) new_otu_mtx[:,:-1] = otu_table[2][:,nonpool_sample_idxs] new_otu_mtx[:,-1] = new_sample_abund otu_outfile.write(format_otu_table(new_sample_ids, otu_table[1], new_otu_mtx, taxonomy=otu_table[3]))
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) otu_table_data = parse_otu_table(open(opts.input_otu_table,'U')) sort_field = opts.sort_field mapping_fp = opts.mapping_fp sorted_sample_ids_fp = opts.sorted_sample_ids_fp if sort_field and mapping_fp: mapping_data = parse_mapping_file(open(mapping_fp,'U')) result = sort_otu_table_by_mapping_field(otu_table_data, mapping_data, sort_field) elif sorted_sample_ids_fp: sorted_sample_ids = sample_ids_from_f(open(sorted_sample_ids_fp,'U')) result = sort_otu_table(otu_table_data, sorted_sample_ids) else: parser.error("must provide either --sort_field and --mapping_fp OR --sorted_sample_ids_fp") # format and write the otu table result_str = format_otu_table(result[0],result[1],result[2],result[3]) of = open(opts.output_fp,'w') of.write(result_str) of.close()
def reconcile_hosts_symbionts(otu_file, host_dist): # filter cOTU table by samples present in host_tree/dm filtered_cotu_table = filter_samples_from_otu_table(otu_file, host_dist[0], negate=True) # Now the cOTU table only has the samples present in the host dm # parse the filtered cOTU table sample_names, taxon_names, data, lineages = parse_otu_table( filtered_cotu_table) # filter cOTU table again because skip_empty doesn't seem to be # working in format_otu_table called from # filter_samples_from_otu_table sample_names, taxon_names, data, lineages = filter_otu_table_by_min( sample_names, taxon_names, data, lineages, min=1) # Filter the host_dists to match the newly trimmed subtree # Note: this is requiring the modified filter_dist method which # returns a native dm tuple rather than a string. host_dist_filtered = filter_samples_from_distance_matrix( host_dist, sample_names, negate=True) filtered_otu_table_lines = format_otu_table( sample_names, taxon_names, data, lineages) return StringIO(filtered_otu_table_lines), host_dist_filtered
def _filter_table_neg_control(otu_table_lines, samples): """removes OTUs from OTU_table that are found in one of the samples in the sample list """ sample_ids, otu_ids, otu_table, lineages = parse_otu_table(otu_table_lines) new_otu_table = [] new_otu_ids = [] new_lineages = [] #get the sample indices to remove sample_indices = [] for i in samples: if i in sample_ids: index = sample_ids.index(i) sample_indices.append(index) for i, row in enumerate(otu_table): #figure out if the OTU is in any of the negative controls count = 0 for j in sample_indices: count += row[j] #only write it to the new OTU table if it is not if count == 0: if lineages: new_lineages.append(lineages[i]) new_otu_table.append(list(row)) new_otu_ids.append(otu_ids[i]) new_otu_table = array(new_otu_table) result = format_otu_table(sample_ids, new_otu_ids, new_otu_table, new_lineages) result = result.split('\n') #remove the samples return _filter_table_samples(result, 1)
def setUp(self): self.l19_data = numpy.array([ [7,1,0,0,0,0,0,0,0], [4,2,0,0,0,1,0,0,0], [2,4,0,0,0,1,0,0,0], [1,7,0,0,0,0,0,0,0], [0,8,0,0,0,0,0,0,0], [0,7,1,0,0,0,0,0,0], [0,4,2,0,0,0,2,0,0], [0,2,4,0,0,0,1,0,0], [0,1,7,0,0,0,0,0,0], [0,0,8,0,0,0,0,0,0], [0,0,7,1,0,0,0,0,0], [0,0,4,2,0,0,0,3,0], [0,0,2,4,0,0,0,1,0], [0,0,1,7,0,0,0,0,0], [0,0,0,8,0,0,0,0,0], [0,0,0,7,1,0,0,0,0], [0,0,0,4,2,0,0,0,4], [0,0,0,2,4,0,0,0,1], [0,0,0,1,7,0,0,0,0] ]) self.l19_sample_names = ['sam1', 'sam2', 'sam3', 'sam4', 'sam5','sam6',\ 'sam7', 'sam8', 'sam9', 'sam_middle', 'sam11', 'sam12', 'sam13', \ 'sam14', 'sam15', 'sam16', 'sam17', 'sam18', 'sam19'] self.l19_taxon_names = ['tax1', 'tax2', 'tax3', 'tax4', 'endbigtaxon',\ 'tax6', 'tax7', 'tax8', 'tax9'] self.l19_taxon_names_w_underscore = ['ta_x1', 'tax2', 'tax3', 'tax4', 'endbigtaxon', 'tax6', 'tax7', 'tax8', 'tax9'] self.l19_str = format_otu_table( self.l19_sample_names, self.l19_taxon_names, self.l19_data.T) self.l19_str_w_underscore = format_otu_table( self.l19_sample_names, self.l19_taxon_names_w_underscore, self.l19_data.T) self.l19_tree_str = '((((tax7:0.1,tax3:0.2):.98,tax8:.3, tax4:.3):.4, ((tax1:0.3, tax6:.09):0.43,tax2:0.4):0.5):.2, (tax9:0.3, endbigtaxon:.08));' self.l19_tree = parse_newick(self.l19_tree_str, PhyloNode) self.files_to_remove = [] self.folders_to_remove = []
def _filter_table_samples(otu_table_lines, min_seqs_per_sample): """removes samples from OTU_table that have less than min_seqs_per_sample """ sample_ids, otu_ids, otu_table, lineages = parse_otu_table(otu_table_lines) counts = sum(otu_table) big_enough_samples = (counts>=int(min_seqs_per_sample)).nonzero() res_otu_table = otu_table.copy() res_otu_table = res_otu_table[:,big_enough_samples[0]] res_sample_ids = map(sample_ids.__getitem__, big_enough_samples[0]) return format_otu_table(res_sample_ids, otu_ids, res_otu_table, lineages)
def _write_rarefaction(self, fname, sub_sample_ids, sub_otu_ids,\ sub_otu_table, otu_lineages): """ depth and rep can be numbers or strings """ if min(numpy.shape(sub_otu_table)) == 0: # no data to write return f = open(fname, 'w') f.write(format_otu_table(sub_sample_ids, sub_otu_ids,\ sub_otu_table, otu_lineages, comment=fname)) f.close()
def _filter_table_samples(otu_table_lines, min_seqs_per_sample): """removes samples from OTU_table that have less than min_seqs_per_sample """ sample_ids, otu_ids, otu_table, lineages = parse_otu_table(otu_table_lines) counts = sum(otu_table) big_enough_samples = (counts >= int(min_seqs_per_sample)).nonzero() res_otu_table = otu_table.copy() res_otu_table = res_otu_table[:, big_enough_samples[0]] res_sample_ids = map(sample_ids.__getitem__, big_enough_samples[0]) return format_otu_table(res_sample_ids, otu_ids, res_otu_table, lineages)
def _write_rarefaction(self, depth, rep, sub_sample_ids, sub_otu_ids,\ sub_otu_table, otu_lineages): """ depth and rep can be numbers or strings """ if min(numpy.shape(sub_otu_table)) == 0: # no data to write return fname = 'rarefaction_' + str(depth) + '_' + str(rep) + '.txt' f = open(os.path.join(self.output_dir, fname), 'w') f.write(format_otu_table(sub_sample_ids, sub_otu_ids,\ sub_otu_table, otu_lineages, comment=fname)) f.close()
def setUp(self): self.l19_data = numpy.array([[7, 1, 0, 0, 0, 0, 0, 0, 0], [4, 2, 0, 0, 0, 1, 0, 0, 0], [2, 4, 0, 0, 0, 1, 0, 0, 0], [1, 7, 0, 0, 0, 0, 0, 0, 0], [0, 8, 0, 0, 0, 0, 0, 0, 0], [0, 7, 1, 0, 0, 0, 0, 0, 0], [0, 4, 2, 0, 0, 0, 2, 0, 0], [0, 2, 4, 0, 0, 0, 1, 0, 0], [0, 1, 7, 0, 0, 0, 0, 0, 0], [0, 0, 8, 0, 0, 0, 0, 0, 0], [0, 0, 7, 1, 0, 0, 0, 0, 0], [0, 0, 4, 2, 0, 0, 0, 3, 0], [0, 0, 2, 4, 0, 0, 0, 1, 0], [0, 0, 1, 7, 0, 0, 0, 0, 0], [0, 0, 0, 8, 0, 0, 0, 0, 0], [0, 0, 0, 7, 1, 0, 0, 0, 0], [0, 0, 0, 4, 2, 0, 0, 0, 4], [0, 0, 0, 2, 4, 0, 0, 0, 1], [0, 0, 0, 1, 7, 0, 0, 0, 0]]) self.l19_sample_names = ['sam1', 'sam2', 'sam3', 'sam4', 'sam5','sam6',\ 'sam7', 'sam8', 'sam9', 'sam_middle', 'sam11', 'sam12', 'sam13', \ 'sam14', 'sam15', 'sam16', 'sam17', 'sam18', 'sam19'] self.l19_taxon_names = ['tax1', 'tax2', 'tax3', 'tax4', 'endbigtaxon',\ 'tax6', 'tax7', 'tax8', 'tax9'] self.l19_taxon_names_w_underscore = [ 'ta_x1', 'tax2', 'tax3', 'tax4', 'endbigtaxon', 'tax6', 'tax7', 'tax8', 'tax9' ] self.l19_str = format_otu_table(self.l19_sample_names, self.l19_taxon_names, self.l19_data.T) self.l19_str_w_underscore = format_otu_table( self.l19_sample_names, self.l19_taxon_names_w_underscore, self.l19_data.T) self.l19_tree_str = '((((tax7:0.1,tax3:0.2):.98,tax8:.3, tax4:.3):.4, ((tax1:0.3, tax6:.09):0.43,tax2:0.4):0.5):.2, (tax9:0.3, endbigtaxon:.08));' self.l19_tree = parse_newick(self.l19_tree_str, PhyloNode) self.files_to_remove = []
def test_format_otu_table(self): """format_otu_table should return biom-formatted string""" a = array([[1, 2, 3], [4, 5, 2718281828459045]]) samples = ["a", "b", "c"] otus = [1, 2] taxa = ["Bacteria", "Archaea"] res = format_otu_table(samples, otus, a) # confirm that parsing the res gives us a valid biom file with # expected observation and sample ids t = parse_biom_table(res.split("\n")) self.assertEqual(t.ObservationIds, ("1", "2")) self.assertEqual(t.SampleIds, ("a", "b", "c"))
def test_format_otu_table(self): """format_otu_table should return biom-formatted string""" a = array([[1, 2, 3], [4, 5, 2718281828459045]]) samples = ['a', 'b', 'c'] otus = [1, 2] taxa = ['Bacteria', 'Archaea'] res = format_otu_table(samples, otus, a) # confirm that parsing the res gives us a valid biom file with # expected observation and sample ids t = parse_biom_table(res.split('\n')) self.assertEqual(t.ObservationIds, ('1', '2')) self.assertEqual(t.SampleIds, ('a', 'b', 'c'))
def test_format_otu_table(self): """format_otu_table should return biom-formatted string""" a = array([[1,2,3], [4,5,2718281828459045]]) samples = ['a','b','c'] otus = [1,2] taxa = ['Bacteria','Archaea'] res = format_otu_table(samples, otus, a) # confirm that parsing the res gives us a valid biom file with # expected observation and sample ids t = parse_biom_table(res.split('\n')) self.assertEqual(t.ObservationIds,('1','2')) self.assertEqual(t.SampleIds,('a','b','c'))
def merge_n_otu_tables(otu_table_fs): """ Merge n otu tables """ if len(otu_table_fs) < 2: raise ValueError, "Two or more OTU tables must be provided." otu_table_f0 = otu_table_fs[0] for otu_table_f in otu_table_fs[1:]: sample_names, otu_names, data, taxonomy = \ merge_otu_tables(otu_table_f0,otu_table_f) otu_table_f0 = format_otu_table(sample_names=sample_names, otu_names=otu_names, data=data, taxonomy=taxonomy).split('\n') return sample_names, otu_names, data, taxonomy
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) out_fh = open(opts.output_file,'w') otu_table_fh = open(opts.otu_table,'U') sample_ids, otu_ids, otu_mtx, otu_metadata = parse_otu_table(otu_table_fh) tree_fh = open(opts.tree_file,'U') tree = DndParser(tree_fh) res_sam_names, res_otus, res_otu_mtx, res_otu_metadata = \ sim_otu_table(sample_ids, otu_ids, otu_mtx, otu_metadata, tree, opts.num, opts.dissim) out_fh.write(format_otu_table(res_sam_names, res_otus, res_otu_mtx, res_otu_metadata))
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) if not isfile(opts.input_path): raise IOError, \ "Input path (%s) not valid. Does it exist?" % opts.input_path if isfile(opts.output_path): raise IOError, \ "Output path (%s) already exists. Please " % opts.output_path +\ "remove or specify a different path" samples, otus, data = parse_trflp(open(opts.input_path,'U')) output_f = open(opts.output_path, 'w') output_f.write(format_otu_table(samples,otus,data, comment='Created with %s' % __file__)) output_f.close()
def summarize_by_cat(map_lines, otu_sample_lines, category, dir_path, norm): """creates the category otu table""" cat_by_sample, sample_by_cat, num_meta, meta_dict, label_lists_dict, \ num_samples_by_cat = get_sample_cat_info(map_lines,category) lines, otus, taxonomy = get_counts_by_cat(otu_sample_lines, num_meta, \ meta_dict,label_lists_dict[category],category,num_samples_by_cat,\ norm) lines = format_otu_table(label_lists_dict[category], otus, array(lines), \ taxonomy=taxonomy, comment='Category OTU Counts-%s'% category) if norm: file_name = os.path.join(dir_path, '%s_otu_table_norm.txt' % category) else: file_name = os.path.join(dir_path, '%s_otu_table.txt' % category) f = open(file_name, 'w') f.write(lines) f.close()
def filter_otu_table_to_n_samples(otu_table_lines,n): """ randomly select n samples from the otu table """ if n < 1: raise ValueError,\ "number of randomly selected sample ids must be greater than 1" sample_ids, otu_ids, otu_table_data, taxa = parse_otu_table(otu_table_lines) samples_to_keep = list(sample_ids) shuffle(samples_to_keep) samples_to_keep = samples_to_keep[:n] otu_table_lines = format_otu_table(\ sample_ids, otu_ids, otu_table_data, taxa).split('\n') result = filter_samples_from_otu_table(otu_table_lines, samples_to_keep, negate=True) return result
def split_otu_table_on_taxonomy(otu_table_lines,level): """ Split OTU table by taxonomic level, yielding formatted OTU tables """ if level < 1: raise ValueError, "Taxonomic level must be greater than zero" sample_ids, otu_ids, otu_table_data, taxa = parse_otu_table(otu_table_lines) taxon_data = {} for otu_id, counts, taxon in zip(otu_ids, otu_table_data, taxa): taxon_at_level = ';'.join(taxon[:level]) try: current_taxon_table = taxon_data[taxon_at_level] except KeyError: taxon_data[taxon_at_level] = [[],[],[]] current_taxon_table = taxon_data[taxon_at_level] current_taxon_table[0].append(otu_id) current_taxon_table[1].append(counts) current_taxon_table[2].append(taxon) for taxon_at_level, taxon_datum in taxon_data.items(): yield taxon_at_level, format_otu_table(sample_ids, taxon_datum[0], array(taxon_datum[1]), taxon_datum[2])
def filter_table(params,filtered_table_path,otu_file): """ Filters table according to OTU counts, occurance, and taxonomy params: Dictionary containing minimum sequence count (min_otu_count) per OTU, minimum number of samples that OTU needs to occur in (min_otu_samples), targetted taxonomy to retain (included_taxa), and taxonomy to exclude (excluded_taxa). filtered_table_path: Open file object to write filtered table to. otu_file: Open file object of input OTU file. """ min_otu_count=params['min_otu_count'] min_otu_samples=params['min_otu_samples'] included_taxa=params['included_taxa'] excluded_taxa=params['excluded_taxa'] otu_data = parse_otu_table(otu_file) # Create list of OTUs that fail to pass filters flagged_otus = [] otu_index = 1 otus = otu_data[otu_index] otu_counts_index = 2 otu_counts = otu_data[otu_counts_index] taxa_index = 3 try: taxa_lines = otu_data[taxa_index] if len(taxa_lines): taxa_present = True else: taxa_present = False except IndexError: taxa_present = False index_counter = -1 for otu_count in otu_counts: index_counter += 1 if otu_count.sum() < min_otu_count or \ (otu_count > 0).sum() < min_otu_samples: flagged_otus.append(otus[index_counter]) continue if taxa_present: taxa = set(taxa_lines[index_counter]) # Check for targetted taxa that also are not excluded if taxa.intersection(included_taxa) and not \ taxa.intersection(excluded_taxa): continue # If taxonomy found in included taxa and no excluded taxa # given, skip filtering. elif taxa.intersection(included_taxa) and not excluded_taxa: continue # Skip any taxonomic filtering if taxa present but no filters given elif not included_taxa and not excluded_taxa: continue # If only specifying exluded taxa, allow inclusion of this OTU # if taxa not in excluded set. elif not included_taxa and not taxa.intersection(excluded_taxa): continue # taxa does is not included, or falls in excluded set, so flag # this OTU for removal else: flagged_otus.append(otus[index_counter]) sample_id_index = 0 raw_otu_table = (format_otu_table(otu_data[sample_id_index], otus, otu_counts, taxonomy=taxa_lines, skip_empty=True)).split('\n') # Filter out lines of the OTU table that are flagged filtered_otu_table = "" for line in raw_otu_table: if line.startswith("#"): filtered_otu_table += line + '\n' continue curr_otu_id = line.split('\t')[0].strip() if curr_otu_id in flagged_otus: continue else: filtered_otu_table += line + '\n' filtered_table_path.write(filtered_otu_table)
try: table = zeros((len(all_otus), len(all_libs)), int) except MemoryError, e: stderr.write('memory error, check format of input otu file\n') stderr.write('are there really %s otus and %s samples?\n' % (len(all_otus), len(all_libs))) stderr.write('traceback follows:\n') raise(e) for o in all_otus: row_idx = all_otus.index(o) row = table[row_idx] seqids = otu_to_seqid[o] for s in seqids: lib = s.rsplit(delim, 1)[0] row[all_libs.index(lib)] += 1 if otu_to_taxonomy: taxonomy = [otu_to_taxonomy.get(o, 'None') for o in all_otus] else: taxonomy=None return format_otu_table(all_libs, all_otus, table, taxonomy, legacy=legacy) def remove_otus(otu_to_seqid,otus_to_exclude): """Remove otus_to_exclude from otu map """ otus_to_exclude_lookup = [e.split()[0] for e in otus_to_exclude] for otu_id in otu_to_seqid.keys(): if otu_id in otus_to_exclude_lookup: del otu_to_seqid[otu_id] return otu_to_seqid