#!/usr/bin/env python import sys import os from treestore import Treestore try: taxonomy = sys.argv[1] except: taxonomy = None t = Treestore() treebase_uri = 'http://purl.org/phylo/treebase/phylows/tree/%s' tree_files = [x for x in os.listdir('trees') if x.endswith('.nex')] base_uri = 'http://www.phylocommons.org/trees/%s' tree_list = set(t.list_trees()) for tree_uri in tree_list: if not 'TB2_' in tree_uri: continue tree_id = t.id_from_uri(tree_uri) tb_uri = treebase_uri % (tree_id.replace('_', ':')) print tree_id, tb_uri t.annotate(tree_uri, annotations='?tree bibo:cites <%s> .' % tb_uri)
from treestore import Treestore import Bio.Phylo as bp import time import datetime from cStringIO import StringIO import sys import cPickle as pkl t = Treestore() sizes = [10, 50, 100, 200, 500, 1000, 2000, 5000] ti = lambda x: str(datetime.timedelta(seconds=round(x, 3)))[:-3] add_times = {} retrieve_times = {} write_times = {} parse_times = {} print 'size\tadd\tretrieve\twrite\tparse' for n in sizes: s = str(n).zfill(4) print s, sys.stdout.flush() start_time = time.time() t.add_trees('tests/bird%s.new' % s, 'newick', 'test%s' % s) add_times[n] = time.time() - start_time print '\t', ti(add_times[n]), sys.stdout.flush() start_time = time.time()
def uri_from_tree_id(tree_id): return Treestore.uri_from_id(tree_id, base_uri=settings.TREE_URI)
#!/usr/bin/env python import sys import os from treestore import Treestore try: taxonomy = sys.argv[1] except: taxonomy = None t = Treestore() tree_files = [x for x in os.listdir('trees') if x.endswith('.nex')] base_uri = 'http://www.phylocommons.org/trees/%s' tree_list = set(t.list_trees()) if taxonomy: sys.stdout.write('Loading taxonomy...') sys.stdout.flush() taxonomy = t.get_trees('%s_taxonomy' % taxonomy)[0] taxonomy.index_labels() print 'done.' errors = set() for tree_file in tree_files: tree_id = 'TB2_' + tree_file[:-len('.nex')] if Treestore.uri_from_id(tree_id) in tree_list: continue print '**', tree_id tree_path = os.path.join('trees', tree_file) with open(tree_path) as input_file: r = input_file.read() if '<!DOCTYPE html' in r: continue try:
def tree_id_from_uri(uri): return Treestore.id_from_uri(uri, base_uri=settings.TREE_URI)