示例#1
0
ott_ncbi = "../ott_ncbi"  #TODO config file
Entrez.email = "*****@*****.**"

phy = Phylesystem()
n = phy.return_study(study_id)[0]
api_wrapper.study.get(study_id, tree=tree_id)

##This is a weird way to get the ingroup node, but I need the OTT ids anyhow.
m = extract_tree(n,
                 tree_id,
                 PhyloSchema('newick',
                             output_nexml2json='1.2.1',
                             content="tree",
                             tip_label="ot:ottId"),
                 subtree_id="ingroup")
otu_dict = gen_otu_dict(n)
ottids = []
for oid, o in otu_dict.items():
    try:
        ottid = o[u'^ot:ottId']
        if ("{}:".format(ottid) in m) or ("{})".format(ottid)
                                          in m) or ("{},".format(ottid) in m):
            ottids.append(ottid)
        else:
            print(o)
    except:
        pass

mrca_node = tree_of_life.mrca(ott_ids=ottids, wrap_response=True)

newick = extract_tree(
def add_study(study_id):
    _LOG.debug('adding study {s}'.format(s=study_id))

    # get latest version of nexson
    # location of repo (test vs dev) dependent on peyotl config
    phy = create_phylesystem_obj()
    try:
        studyobj = phy.get_study(study_id)['data']
    except:
        _LOG.debug('did not find study {s} in phylesystem'.format(s=study_id))
        raise HTTPNotFound("Study {s} not found in phylesystem".format(s=study_id))
    nexml = get_nexml_el(studyobj)
    proposedTrees = nexml.get('^ot:candidateTreeForSynthesis')
    if proposedTrees is None:
        proposedTrees = []

    # create a new Study object
    new_study = Study(id=study_id)
    DBSession.add(new_study)

    # update with treebase id, if exists
    datadeposit = nexml.get('^ot:dataDeposit')
    if (datadeposit):
        url = datadeposit['@href']
        if (url):
            pattern = re.compile(u'.+TB2:(.+)$')
            matchobj = re.match(pattern,url)
            if (matchobj):
                tb_id = matchobj.group(1)
                new_study.treebase_id=tb_id

    # get curator(s), noting that ot:curators might be a
    # string or a list
    c = nexml.get('^ot:curatorName')
    # create list of curator objects
    curator_list=[]
    if (isinstance(c,basestring)):
        curator_list.append(c)
    else:
        curator_list = c
    for curator in curator_list:
        test_c = DBSession.query(Curator).filter(Curator.name==curator).first()
        if test_c:
            _LOG.debug("curator {c} already exists".format(c=curator))
            #DBSession.add(curator)
            new_study.curators.append(test_c)
        else:
            _LOG.debug("curator {c} does not yet exist".format(c=curator))
            new_study.curators.append(Curator(name=curator))

    # mapped otus in this study
    otu_dict = gen_otu_dict(studyobj)
    # iterate over the OTUs in the study, collecting the mapped
    # ones (oid to ott_id mapping held at the study level)
    mapped_otus = {}
    for oid, o in otu_dict.items():
        ottID = o.get('^ot:ottId')
        if ottID is not None:
            mapped_otus[oid]=ottID

    # iterate over trees and insert tree data
    ntrees = 0
    for trees_group_id, tree_id, tree in iter_trees(studyobj):
        _LOG.debug(' tree : {t}'.format(t=tree_id))
        ntrees+=1
        proposedForSynth = False
        if (tree_id in proposedTrees):
            proposedForSynth = True

        treejson = json.dumps(tree)
        new_tree = Tree(
            tree_id=tree_id,
            study_id=study_id,
            proposed=proposedForSynth,
            data=treejson
            )

        # get otus
        ottIDs = set()     # ott ids for this tree
        ntips=0
        for node_id, node in iter_node(tree):
            oid = node.get('@otu')
            # no @otu property on internal nodes
            if oid is not None:
                ntips+=1
                #ottID = mapped_otus[oid]
                if oid in mapped_otus:
                    ottID = mapped_otus[oid]
                    # _LOG.debug(' mapped ottID: {m}'.format(m=ottID))
                    # check that this exists in the taxonomy
                    # (it might not, if the ID has been deprecated)
                    taxon = DBSession.query(Taxonomy).filter(
                        Taxonomy.id==ottID
                        ).first()
                    if taxon:
                        lineage = get_lineage(ottID)
                        _LOG.debug(' lineage of {m} = {l}'.format(m=ottID,l=lineage))
                        for t in lineage:
                            ottIDs.add(t)
        new_tree.ntips = ntips
        for t in ottIDs:
            taxon = DBSession.query(Taxonomy).filter(
                Taxonomy.id==t
                ).first()
            # _LOG.debug(' adding {t},{n} to tree {tid}'.format(
            #     t=t,
            #     n=taxon.name,
            #     tid=tree_id)
            #     )
            new_tree.otus.append(taxon)

        # add the tree
        DBSession.add(new_tree)

    # now that we have added the tree info, update the study record
    # with the json data (minus the tree info)
    del nexml['treesById']
    studyjson = json.dumps(nexml)
    new_study.data=studyjson
    new_study.ntrees = ntrees
reported_study_count = 0
study_count = 0
OTU_count = 0
unmapped_OTU_count = 0
unique_OTU_count = 0
nominated_study_count = 0
nominated_study_OTU_count = 0
nominated_study_unique_OTU_count = 0
nominated_study_unmapped_OTU_count = 0
run_time = 0

ott_id_set = set()
nominated_ott_id_set = set()
for study_id, n in phy.iter_study_objs():
    reported_study_count += 1
    otu_dict = gen_otu_dict(n)
    if not bool(otu_dict):
        continue
    nex_obj = get_nexml_el(n)
    study_count += 1
    not_intended_for_synth = nex_obj.get('^ot:notIntendedForSynthesis')
    intended_for_synth = (not_intended_for_synth is None) or (not_intended_for_synth is False)
    if intended_for_synth:
        nominated_study_count += 1
        nominated_study_OTU_count += len(otu_dict)
    OTU_count += len(otu_dict)

    for oid, o in otu_dict.items():
        ott_id = o.get('^ot:ottId')
        if ott_id is None:
            unmapped_OTU_count += 1
示例#4
0
def addStudy(session, study_id):
    # get latest version of nexson
    print "adding study {s}".format(s=study_id)
    phy = PhylesystemAPI(get_from="local")
    studyobj = phy.get_study(study_id)["data"]
    nexml = get_nexml_el(studyobj)
    year = nexml.get("^ot:studyYear")
    proposedTrees = nexml.get("^ot:candidateTreeForSynthesis")
    if proposedTrees is None:
        proposedTrees = []

    # create a new Study object
    new_study = Study(id=study_id, year=year)
    session.add(new_study)
    # session.commit()

    # get curator(s), noting that ot:curators might be a
    # string or a list
    c = nexml.get("^ot:curatorName")
    print " ot:curatorName: ", c
    # create list of curator objects
    curator_list = []
    if isinstance(c, basestring):
        curator_list.append(c)
    else:
        curator_list = c
    for curator in curator_list:
        test_c = session.query(Curator).filter(Curator.name == curator).first()
        if test_c:
            print "curator {c} already exists".format(c=curator)
            # session.add(curator)
            new_study.curators.append(test_c)
        else:
            print "curator {c} does no exist".format(c=curator)
            new_study.curators.append(Curator(name=curator))

    # mapped otus in this study
    otu_dict = gen_otu_dict(studyobj)
    # iterate over the OTUs in the study, collecting the mapped
    # ones (oid to ott_id mapping held at the study level)
    mapped_otus = {}
    for oid, o in otu_dict.items():
        ottID = o.get("^ot:ottId")
        if ottID is not None:
            mapped_otus[oid] = ottID

    # iterate over trees and insert tree data
    for trees_group_id, tree_id, tree in iter_trees(studyobj):
        print " tree :", tree_id
        proposedForSynth = False
        if tree_id in proposedTrees:
            proposedForSynth = True

        treejson = json.dumps(tree)
        new_tree = Tree(tree_id=tree_id, study_id=study_id, proposed=proposedForSynth, data=treejson)

        # get otus
        ottIDs = set()  # ott ids for this tree
        ntips = 0
        for node_id, node in iter_node(tree):
            oid = node.get("@otu")
            # no @otu property on internal nodes
            if oid is not None:
                ntips += 1
                # ottID = mapped_otus[oid]
                if oid in mapped_otus:
                    ottID = mapped_otus[oid]
                    # check that this exists in the taxonomy
                    # (it might not, if the ID has been deprecated)
                    taxon = session.query(Taxonomy).filter(Taxonomy.id == ottID).first()
                    if taxon:
                        new_tree.otus.append(taxon)
                        ottIDs.add(ottID)
        new_tree.ntips = ntips
        # need to write function for recursive query of Taxonomy table
        # ottIDs = parent_closure(ottIDs,taxonomy)

        # update with treebase id, if exists
        datadeposit = nexml.get("^ot:dataDeposit")
        if datadeposit:
            url = datadeposit["@href"]
            pattern = re.compile(u".+TB2:(.+)$")
            matchobj = re.match(pattern, url)
            if matchobj:
                tb_id = matchobj.group(1)
                new_tree.treebase_id = tb_id
        session.add(new_tree)

    # now that we have added the tree info, update the study record
    # with the json data (minus the tree info)
    del nexml["treesById"]
    studyjson = json.dumps(nexml)
    new_study.data = studyjson
    session.commit()