def convert_orthologs_for_upload(infile, outfile): ''' The Ortholog Benchmarking website expects orthologs in a file where each line has an ortholog, represented as a pair of sequence ids separated by a tab. http://linneus54.inf.ethz.ch:8080/cgi-bin/gateway.pl ''' with open(outfile, 'w') as fh: for params, orthologs in orthutil.orthDatasFromFileGen(infile): for qdb, sdb, distance in orthologs: fh.write('{qdb}\t{sdb}\n'.format(**locals()))
def load_orth_datas(ds): ''' load orthDatas serially. takes a long time. use dones to resume job if it dies. ''' release = roundup.dataset.getDatasetId(ds) print 'getting ids' genomeToId = roundup_db.getGenomeToId(release) divToId = roundup_db.getDivergenceToId(release) evalueToId = roundup_db.getEvalueToId(release) geneToId = roundup_db.getSequenceToId(release) print 'loading orthDatas' for path in roundup.dataset.getOrthologsFiles(ds): if get_dones(ds).done(path): print 'already loaded:', path else: print 'loading', path orthDatasGen = orthutil.orthDatasFromFileGen(path) roundup_db.loadReleaseResults(release, genomeToId, divToId, evalueToId, geneToId, orthDatasGen) get_dones(ds).mark(path) print 'done loading all orthDatas'