ott_ncbi = "../ott_ncbi" #TODO config file Entrez.email = "*****@*****.**" phy = Phylesystem() n = phy.return_study(study_id)[0] api_wrapper.study.get(study_id, tree=tree_id) ##This is a weird way to get the ingroup node, but I need the OTT ids anyhow. m = extract_tree(n, tree_id, PhyloSchema('newick', output_nexml2json='1.2.1', content="tree", tip_label="ot:ottId"), subtree_id="ingroup") otu_dict = gen_otu_dict(n) ottids = [] for oid, o in otu_dict.items(): try: ottid = o[u'^ot:ottId'] if ("{}:".format(ottid) in m) or ("{})".format(ottid) in m) or ("{},".format(ottid) in m): ottids.append(ottid) else: print(o) except: pass mrca_node = tree_of_life.mrca(ott_ids=ottids, wrap_response=True) newick = extract_tree(
def add_study(study_id): _LOG.debug('adding study {s}'.format(s=study_id)) # get latest version of nexson # location of repo (test vs dev) dependent on peyotl config phy = create_phylesystem_obj() try: studyobj = phy.get_study(study_id)['data'] except: _LOG.debug('did not find study {s} in phylesystem'.format(s=study_id)) raise HTTPNotFound("Study {s} not found in phylesystem".format(s=study_id)) nexml = get_nexml_el(studyobj) proposedTrees = nexml.get('^ot:candidateTreeForSynthesis') if proposedTrees is None: proposedTrees = [] # create a new Study object new_study = Study(id=study_id) DBSession.add(new_study) # update with treebase id, if exists datadeposit = nexml.get('^ot:dataDeposit') if (datadeposit): url = datadeposit['@href'] if (url): pattern = re.compile(u'.+TB2:(.+)$') matchobj = re.match(pattern,url) if (matchobj): tb_id = matchobj.group(1) new_study.treebase_id=tb_id # get curator(s), noting that ot:curators might be a # string or a list c = nexml.get('^ot:curatorName') # create list of curator objects curator_list=[] if (isinstance(c,basestring)): curator_list.append(c) else: curator_list = c for curator in curator_list: test_c = DBSession.query(Curator).filter(Curator.name==curator).first() if test_c: _LOG.debug("curator {c} already exists".format(c=curator)) #DBSession.add(curator) new_study.curators.append(test_c) else: _LOG.debug("curator {c} does not yet exist".format(c=curator)) new_study.curators.append(Curator(name=curator)) # mapped otus in this study otu_dict = gen_otu_dict(studyobj) # iterate over the OTUs in the study, collecting the mapped # ones (oid to ott_id mapping held at the study level) mapped_otus = {} for oid, o in otu_dict.items(): ottID = o.get('^ot:ottId') if ottID is not None: mapped_otus[oid]=ottID # iterate over trees and insert tree data ntrees = 0 for trees_group_id, tree_id, tree in iter_trees(studyobj): _LOG.debug(' tree : {t}'.format(t=tree_id)) ntrees+=1 proposedForSynth = False if (tree_id in proposedTrees): proposedForSynth = True treejson = json.dumps(tree) new_tree = Tree( tree_id=tree_id, study_id=study_id, proposed=proposedForSynth, data=treejson ) # get otus ottIDs = set() # ott ids for this tree ntips=0 for node_id, node in iter_node(tree): oid = node.get('@otu') # no @otu property on internal nodes if oid is not None: ntips+=1 #ottID = mapped_otus[oid] if oid in mapped_otus: ottID = mapped_otus[oid] # _LOG.debug(' mapped ottID: {m}'.format(m=ottID)) # check that this exists in the taxonomy # (it might not, if the ID has been deprecated) taxon = DBSession.query(Taxonomy).filter( Taxonomy.id==ottID ).first() if taxon: lineage = get_lineage(ottID) _LOG.debug(' lineage of {m} = {l}'.format(m=ottID,l=lineage)) for t in lineage: ottIDs.add(t) new_tree.ntips = ntips for t in ottIDs: taxon = DBSession.query(Taxonomy).filter( Taxonomy.id==t ).first() # _LOG.debug(' adding {t},{n} to tree {tid}'.format( # t=t, # n=taxon.name, # tid=tree_id) # ) new_tree.otus.append(taxon) # add the tree DBSession.add(new_tree) # now that we have added the tree info, update the study record # with the json data (minus the tree info) del nexml['treesById'] studyjson = json.dumps(nexml) new_study.data=studyjson new_study.ntrees = ntrees
reported_study_count = 0 study_count = 0 OTU_count = 0 unmapped_OTU_count = 0 unique_OTU_count = 0 nominated_study_count = 0 nominated_study_OTU_count = 0 nominated_study_unique_OTU_count = 0 nominated_study_unmapped_OTU_count = 0 run_time = 0 ott_id_set = set() nominated_ott_id_set = set() for study_id, n in phy.iter_study_objs(): reported_study_count += 1 otu_dict = gen_otu_dict(n) if not bool(otu_dict): continue nex_obj = get_nexml_el(n) study_count += 1 not_intended_for_synth = nex_obj.get('^ot:notIntendedForSynthesis') intended_for_synth = (not_intended_for_synth is None) or (not_intended_for_synth is False) if intended_for_synth: nominated_study_count += 1 nominated_study_OTU_count += len(otu_dict) OTU_count += len(otu_dict) for oid, o in otu_dict.items(): ott_id = o.get('^ot:ottId') if ott_id is None: unmapped_OTU_count += 1
def addStudy(session, study_id): # get latest version of nexson print "adding study {s}".format(s=study_id) phy = PhylesystemAPI(get_from="local") studyobj = phy.get_study(study_id)["data"] nexml = get_nexml_el(studyobj) year = nexml.get("^ot:studyYear") proposedTrees = nexml.get("^ot:candidateTreeForSynthesis") if proposedTrees is None: proposedTrees = [] # create a new Study object new_study = Study(id=study_id, year=year) session.add(new_study) # session.commit() # get curator(s), noting that ot:curators might be a # string or a list c = nexml.get("^ot:curatorName") print " ot:curatorName: ", c # create list of curator objects curator_list = [] if isinstance(c, basestring): curator_list.append(c) else: curator_list = c for curator in curator_list: test_c = session.query(Curator).filter(Curator.name == curator).first() if test_c: print "curator {c} already exists".format(c=curator) # session.add(curator) new_study.curators.append(test_c) else: print "curator {c} does no exist".format(c=curator) new_study.curators.append(Curator(name=curator)) # mapped otus in this study otu_dict = gen_otu_dict(studyobj) # iterate over the OTUs in the study, collecting the mapped # ones (oid to ott_id mapping held at the study level) mapped_otus = {} for oid, o in otu_dict.items(): ottID = o.get("^ot:ottId") if ottID is not None: mapped_otus[oid] = ottID # iterate over trees and insert tree data for trees_group_id, tree_id, tree in iter_trees(studyobj): print " tree :", tree_id proposedForSynth = False if tree_id in proposedTrees: proposedForSynth = True treejson = json.dumps(tree) new_tree = Tree(tree_id=tree_id, study_id=study_id, proposed=proposedForSynth, data=treejson) # get otus ottIDs = set() # ott ids for this tree ntips = 0 for node_id, node in iter_node(tree): oid = node.get("@otu") # no @otu property on internal nodes if oid is not None: ntips += 1 # ottID = mapped_otus[oid] if oid in mapped_otus: ottID = mapped_otus[oid] # check that this exists in the taxonomy # (it might not, if the ID has been deprecated) taxon = session.query(Taxonomy).filter(Taxonomy.id == ottID).first() if taxon: new_tree.otus.append(taxon) ottIDs.add(ottID) new_tree.ntips = ntips # need to write function for recursive query of Taxonomy table # ottIDs = parent_closure(ottIDs,taxonomy) # update with treebase id, if exists datadeposit = nexml.get("^ot:dataDeposit") if datadeposit: url = datadeposit["@href"] pattern = re.compile(u".+TB2:(.+)$") matchobj = re.match(pattern, url) if matchobj: tb_id = matchobj.group(1) new_tree.treebase_id = tb_id session.add(new_tree) # now that we have added the tree info, update the study record # with the json data (minus the tree info) del nexml["treesById"] studyjson = json.dumps(nexml) new_study.data = studyjson session.commit()