def ids_biom(tmpdir_factory, list_of_kos): fn = tmpdir_factory.mktemp("data").join("ko_list.biom") observations = list_of_kos + ['K99999'] samples = ('Sample1', 'Sample2') data = np.array([[1, 1, 0, 0], [1, 0, 1, 0]]) table = Table(data.transpose(), observations, samples) table.to_json('testing', open(str(fn), 'w')) return str(fn)
def main(table_loc, otu_list, collapsed_name, output_file, classic=False): table = load_table(table_loc) f = open(otu_list) otus = f.read().strip().split() otus = set(otus) & set(table.ids(axis="observation")) table1 = table.filter(otus, axis="observation", inplace=False) table2 = table.filter(otus, axis="observation", invert=True, inplace=False) sums1 = table1.sum(axis='sample') sums2 = table2.sum(axis='sample') new_table = Table(numpy.array([sums1,sums2]), [collapsed_name, "not_"+collapsed_name], table.ids(axis="sample"), type="otu baptable") if classic: # print to tab delimited biom table open(output_file, 'w').write(new_table.to_tsv()) else: # print biom table new_table.to_json("predict_reactions.py", open(output_file, 'w'))
def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--input_file", help="location of biom table") parser.add_argument("-m", "--otu_map", help="location of OTU map") parser.add_argument("-o", "--output_file", help="location of output biom table") args = parser.parse_args() otu_map = open(args.otu_map, 'U') otu_map = otu_map.readlines() otu_map = [i.strip().split() for i in otu_map] otu_map = {i[0]: i[1:] for i in otu_map} table = load_table(args.input_file) new_table = np.zeros((len(otu_map), table.shape[1])) for i, otu in enumerate(otu_map): for seq in otu_map[otu]: new_table[i, ] += table.data(seq, axis="observation") new_table = Table(new_table, otu_map.keys(), table.ids()) new_table.to_json("dada2_to_otu_table", open(args.output_file, 'w'))
def generaete_biom_file(res_df, o, tg_rank, sampleid): """ output result in biom format """ import numpy as np import biom from biom.table import Table if biom.__version__ < '2.1.7': sys.exit("[ERROR] Biom library requires v2.1.7 or above.\n") target_df = pd.DataFrame() target_idx = (res_df['LEVEL']==tg_rank) target_df = res_df.loc[target_idx, ['ABUNDANCE','TAXID']] target_df['LINEAGE'] = target_df['TAXID'].apply(lambda x: gt.taxid2lineage(x, True, True)).str.split('|') sample_ids = [sampleid] data = np.array(target_df['ABUNDANCE']).reshape(len(target_df), 1) observ_ids = target_df['TAXID'] observ_metadata = [{'taxonomy': x} for x in target_df['LINEAGE'].tolist()] biom_table = Table(data, observ_ids, sample_ids, observ_metadata, table_id='GOTTCHA2') biom_table.to_json('GOTTCHA2', direct_io=o) return True
mode = 'less greedy' print('Released are the {0} capitalists.'.format(mode)) if not greedy: print("\tI'll spit out some profit to miximize mine.\n") else: print('\tAll profit is mine!\n') alnNormalized = amplicon.initAlignment(alnString) wtaProfile = amplicon.winnerTakeAll(alnNormalized, progress=True, greedy=greedy, weight=weight) print('Winner take all profile found! {0} references survived.'.format( len(wtaProfile))) profile = list(wtaProfile.items()) profile.sort(key=lambda x: x[1], reverse=True) print('Tab-delimited profile wrote to {0}.'.format(tabFile)) with open(tabFile, 'w') as f: f.write('{0}\t{1}\n'.format('Reference', sampleName)) for item in profile: if int(item[1]) >= 1: f.write('{0}\t{1}\n'.format(item[0], int(item[1]))) # Output as biom format biomTable = Table(np.array([[i[1]] for i in profile]), [i[0] for i in profile], [sampleName], table_id='single_sample_biom') print('Biom JSON format profile wrote to {0}.'.format(biomFile)) with open(biomFile, 'w') as f: biomTable.to_json('Generated_by_metaSeq', f)
# print "Taxonomy: %s" %Taxonomy for taxon in observ_ids: # print taxon # print Taxonomy[taxon] observation_metadata.append(Taxonomy[taxon]) #print "observation metadata:\n%s" %observation_metadata #print len(observation_metadata) table = Table(data, observ_ids, sample_id, observation_metadata, sample_metadata, table_id='Example Table') print table out = open(args.prefix + ".biom", "w") table.to_json('pplacer converted by jplace_to_biom.py v.' + VERSION, direct_io=out) out.close() out = open(args.prefix + ".tsv", "w") out.write(table.to_tsv( header_key='taxonomy', header_value='taxomomy')) #to_json('generaged by test', direct_io=out) out.close() print "\n##### DONE! #####\n"
# print "index: %i" %index ind_taxonomy.append('%s%s' %(syn[levels[index]], taxon[0]['ScientificName'])) # print ind_taxonomy Taxonomy[taxon[0]['ScientificName']]['taxonomy'] = ind_taxonomy # print "Taxonomy: %s" %Taxonomy for taxon in observ_ids: # print taxon # print Taxonomy[taxon] observation_metadata.append(Taxonomy[taxon]) #print "observation metadata:\n%s" %observation_metadata #print len(observation_metadata) table = Table(data, observ_ids, sample_id, observation_metadata, sample_metadata, table_id='Example Table') print table out=open(args.prefix+".biom","w") table.to_json('pplacer converted by jplace_to_biom.py v.'+VERSION, direct_io=out) out.close() out=open(args.prefix+".tsv","w") out.write(table.to_tsv(header_key='taxonomy', header_value='taxomomy')) #to_json('generaged by test', direct_io=out) out.close() print "\n##### DONE! #####\n"