def combine_asr_tables(output_files,verbose=False): """ Combine all tables coming from asr output. Cuts 2nd column out and joins them together into single table. Assumes all output files have same row identifiers and that these are in the same order. """ #Going to store an array of arrays here combined_table=[] #load in the first column (containing row ids). File doesn't matter since they should all have identical first columns. table=LoadTable(filename=output_files[0],header=True,sep='\t') row_ids = table.getRawData(columns=[table.Header[0]]) combined_table.append([table.Header[0]]) for row_id in row_ids: combined_table.append([row_id]) #Now add the rest of the files to the table for i,output_file in enumerate(output_files): if verbose: print "Combining file {0} of {1}: {2}".format(i,len(output_files),output_file) #pull out the second column (first column with actual preditions) table=LoadTable(filename=output_file,header=True,sep='\t') predictions = table.getRawData(columns=[table.Header[1]]) #Add the header for our column to the list of headers combined_table[0].append(table.Header[1]) #Add rest of values in the column j=1 for prediction in predictions: combined_table[j].append(prediction) j+=1 return combined_table
def combine_asr_tables(output_files,verbose=False): """ Combine all tables coming from asr output. Cuts 2nd column out and joins them together into single table. Assumes all output files have same row identifiers and that these are in the same order. """ #Going to store an array of arrays here combined_table=[] #load in the first column (containing row ids). File doesn't matter since they should all have identical first columns. table=LoadTable(filename=output_files[0],header=True,sep='\t') row_ids = table.getRawData(columns=[table.Header[0]]) combined_table.append([table.Header[0]]) for row_id in row_ids: combined_table.append([row_id]) #Now add the rest of the files to the table for i,output_file in enumerate(output_files): if verbose: print "Combining file {0} of {1}: {2}".format(i,len(output_files),output_file) #pull out the second column (first column with actual preditions) table=LoadTable(filename=output_file,header=True,sep='\t') predictions = table.getRawData(columns=[table.Header[1]]) #Add the header for our column to the list of headers combined_table[0].append(table.Header[1]) #Add rest of values in the column j=1 for prediction in predictions: combined_table[j].append(prediction) j+=1 return combined_table
def test_export_table(self): """correctly generates table file""" orig_data = dict(counts=[[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]], ranks=[0, 1, 2, 3, 4], labels=['a', 'b', 'c', 'd', 'e']) coll = RegionCollection(**orig_data) expect = coll.toTable().getRawData() coll.writeToFile('testdata', as_table=True) got = LoadTable('testdata', sep='\t') self.assertEqual(got.getRawData(), expect) remove_files(['testdata'], error_on_missing=False)
def wagner_for_picrust(tree_path, trait_table_path, gain=None, max_paralogs=None, HALT_EXEC=False): '''Runs count application controller given path of tree and trait table and returns a Table''' #initialize Count app controller count = Count(HALT_EXEC=HALT_EXEC) #set the parameters if gain: count.Parameters['-gain'].on(gain) if max_paralogs: count.Parameters['-max_paralogs'].on(max_paralogs) ###Have to manipulate the trait table some. Need to transpose it and strip ids surrounded in quotes. table = LoadTable(filename=trait_table_path, header=True, sep='\t') #get the first column (containing row ids) genome_ids = table.getRawData(table.Header[0]) #remove single quotes from the id if they exist genome_ids = [str(id).strip('\'') for id in genome_ids] #transpose the matrix table = table.transposed(new_column_name=table.Header[0]) #Change the headers table = table.withNewHeader(table.Header[1:], genome_ids) #write the modified table to a tmp file tmp_table_path = get_tmp_filename() table.writeToFile(tmp_table_path, sep='\t') #Run Count here result = count(data=(tree_path, tmp_table_path)) #Remove tmp file remove(tmp_table_path) #tree=LoadTree(tree_path) tree = DndParser(open(tree_path)) #parse the results into a Cogent Table asr_table = parse_wagner_parsimony_output(result["StdOut"].readlines(), remove_num_tips=len(tree.tips())) #transpose the table asr_table = asr_table.transposed(new_column_name='nodes') return asr_table
def wagner_for_picrust(tree_path,trait_table_path,gain=None,max_paralogs=None,HALT_EXEC=False): '''Runs count application controller given path of tree and trait table and returns a Table''' #initialize Count app controller count=Count(HALT_EXEC=HALT_EXEC) #set the parameters if gain: count.Parameters['-gain'].on(gain) if max_paralogs: count.Parameters['-max_paralogs'].on(max_paralogs) ###Have to manipulate the trait table some. Need to transpose it and strip ids surrounded in quotes. table = LoadTable(filename=trait_table_path,header=True,sep='\t') #get the first column (containing row ids) genome_ids = table.getRawData(table.Header[0]) #remove single quotes from the id if they exist genome_ids=[str(id).strip('\'') for id in genome_ids] #transpose the matrix table = table.transposed(new_column_name=table.Header[0]) #Change the headers table=table.withNewHeader(table.Header[1:],genome_ids) #write the modified table to a tmp file tmp_table_path =get_tmp_filename() table.writeToFile(tmp_table_path,sep='\t') #Run Count here result = count(data=(tree_path,tmp_table_path)) #Remove tmp file remove(tmp_table_path) #tree=LoadTree(tree_path) tree=DndParser(open(tree_path)) #parse the results into a Cogent Table asr_table= parse_wagner_parsimony_output(result["StdOut"].readlines(),remove_num_tips=len(tree.tips())) #transpose the table asr_table = asr_table.transposed(new_column_name='nodes') return asr_table
def _load(self, filename): """loads attributes from a gzipped, .npy data structure or a tab delimited cogent table""" try: infile = gzip.GzipFile(filename, 'r') data = numpy.load(infile) infile.close() # remember numpy.load() returns and array object # numpy.load().tolist() returns a dict ... wtf ! data = data.tolist() for name in data: value = data[name] self.__dict__[name] = value if (name == 'ranks' or name == 'counts') and value is not None: self.__dict__[name] = value.astype(float) if name == 'labels' and value is not None: self.__dict__[name] = value.astype(str) except Exception as e: print "Trying to load from table" data = LoadTable(filename, sep='\t') # convert table to collection here. ls = [] rs = [] cs = [] for row in data.getRawData(): l =numpy.unicode(row[0]) r = numpy.float(row[1]) c = numpy.array(row[2:len(row)], dtype=numpy.float32) ls.append(l) rs.append(r) cs.append(c) self.labels = numpy.array(ls) self.ranks = numpy.array(rs) self.counts = numpy.array(cs) self.N = self.counts.shape[0]