示例#1
0
def main(df, log_dir="./logs", fast_run=False):
    df = filter_df_clinical_missense(df)
    # df = df.head(2)

    login = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS)

    # make sure we have all the variant items we need
    hgvs_qid = id_mapper(PROPS['HGVS nomenclature'])
    for _, row in tqdm(df.iterrows(), total=len(df)):
        if row.gDNA not in hgvs_qid:
            continue
            label = "{} ({})".format(row.gDNA, row['individual_mutation'])
            print("creating {}".format(label))
            try:
                item = create_missense_variant_item(row.gDNA,
                                                    label,
                                                    login,
                                                    fast_run=fast_run)
            except Exception as e:
                print(e)
                wdi_core.WDItemEngine.log(
                    "ERROR",
                    wdi_helpers.format_msg(row.gDNA, "gDNA", None, str(e),
                                           type(e)))
                continue
            hgvs_qid[row.gDNA] = item.wd_item_id

    for _, row in tqdm(df.iterrows(), total=len(df)):
        if row.gDNA not in hgvs_qid:
            wdi_core.WDItemEngine.log(
                "WARNING",
                wdi_helpers.format_msg(
                    row.gDNA, "gDNA", None,
                    "variant not found: {}".format(row.gDNA),
                    "variant not found"))
            continue
        if row.Association not in association_map:
            wdi_core.WDItemEngine.log(
                "WARNING",
                wdi_helpers.format_msg(
                    row.gDNA, "gDNA", None,
                    "Association not found: {}".format(row.Association),
                    "association not found"))
            continue
        qid = hgvs_qid[row.gDNA]
        association = association_map[row.Association]
        drug_qid = row.Drug_qid
        prim_tt_qid = row.prim_tt_qid
        source = row.Source
        evidence_level = row['Evidence level']

        item = create_variant_annotation(qid, association, drug_qid,
                                         prim_tt_qid, source, evidence_level,
                                         login)
 def create(self, write=True):
     if self.deprecated:
         msg = wdi_helpers.format_msg(self.doid, 'P699', None, "delete me", msg_type="delete me")
         wdi_core.WDItemEngine.log("WARNING", msg)
         print(msg)
         return None
     try:
         self.create_xref_statements()
         self.s.extend(self.s_xref)
         self.create_main_statements()
         self.s.extend(self.s_main)
         wd_item = wdi_core.WDItemEngine(data=self.s,
                                         append_value=[PROPS['subclass of'], PROPS['instance of'],
                                                       PROPS['has cause'], PROPS['location'],
                                                       PROPS['OMIM ID'], PROPS['Orphanet ID'],
                                                       PROPS['MeSH ID'], PROPS['ICD-10-CM'],
                                                       PROPS['ICD-10'], PROPS['ICD-9-CM'],
                                                       PROPS['ICD-9'], PROPS['NCI Thesaurus ID'],
                                                       PROPS['UMLS CUI']
                                                       ],
                                         fast_run=self.do_graph.fast_run,
                                         fast_run_base_filter={'P699': ''},
                                         fast_run_use_refs=True,
                                         global_ref_mode='CUSTOM',
                                         ref_handler=update_retrieved_if_new
                                         )
         wd_item.fast_run_container.debug = False
         if wd_item.get_label(lang="en") == "":
             wd_item.set_label(self.lbl, lang="en")
         current_descr = wd_item.get_description(lang='en')
         if current_descr == self.definition and self.definition and len(self.definition) < 250:
             # change current def to cleaned def
             wd_item.set_description(utils.clean_description(self.definition))
         elif current_descr.lower() in {"", "human disease", "disease"} and self.definition and len(
                 self.definition) < 250:
             wd_item.set_description(utils.clean_description(self.definition))
         elif current_descr.lower() == "":
             wd_item.set_description(description="human disease", lang='en')
         if self.synonyms is not None:
             wd_item.set_aliases(aliases=self.synonyms, lang='en', append=True)
         if self.wikilink is not None:
             # a lot of these are not right... don't do this
             # wd_item.set_sitelink(site="enwiki", title=self.wikilink)
             pass
         wdi_helpers.try_write(wd_item, record_id=self.doid, record_prop='P699', login=self.do_graph.login,
                               write=write)
         return wd_item
     except Exception as e:
         exc_info = sys.exc_info()
         print(self.doid)
         traceback.print_exception(*exc_info)
         msg = wdi_helpers.format_msg(self.doid, 'P699', None, str(e), msg_type=type(e))
         wdi_core.WDItemEngine.log("ERROR", msg)
示例#3
0
def filter_df_clinical_missense(df):
    # Keep those with clinical evidence only
    clinical_evidence = {
        'FDA guidelines', 'European LeukemiaNet guidelines', 'NCCN guidelines',
        'CPIC guidelines', 'NCCN/CAP guidelines'
    }
    df = df[df['Evidence level'].isin(clinical_evidence)]

    # MUT only, with a HGVS ID
    drop = df[df.gDNA.isnull()]
    df = df[df.gDNA.notnull()]
    for _, row in drop.iterrows():
        wdi_core.WDItemEngine.log(
            "WARNING",
            wdi_helpers.format_msg(row.Alteration, "alteration", None, '',
                                   "no HGVS ID"))

    # get rid of those where we don't know the drug
    drop = df[df.Drug_qid.isnull()]
    df = df[df.Drug_qid.notnull()]
    for _, row in drop.iterrows():
        wdi_core.WDItemEngine.log(
            "WARNING",
            wdi_helpers.format_msg(row.Alteration, "alteration", None,
                                   "unknown drug: {}".format(row.Drug),
                                   "unknown drug"))
    # get rid of the multiple drug ("or") items
    drop = df[df.Drug_qid.str.count(";") != 0]
    df = df[df.Drug_qid.str.count(";") == 0]
    for _, row in drop.iterrows():
        wdi_core.WDItemEngine.log(
            "WARNING",
            wdi_helpers.format_msg(row.Alteration, "alteration", None,
                                   "unknown drug: {}".format(row.Drug),
                                   "unknown drug"))

    # get rid of those where we don't know the disease
    drop = df[df.prim_tt_qid.isnull()]
    df = df[df.prim_tt_qid.notnull()]
    for _, row in drop.iterrows():
        wdi_core.WDItemEngine.log(
            "WARNING",
            wdi_helpers.format_msg(
                row.Alteration, "alteration", None,
                "unknown disease: {}".format(row['Primary Tumor type']),
                "unknown disease"))

    return df
示例#4
0
    def make_gene_encodes(self, write=True):
        """
        Add an "encodes" statement to the gene item
        :return:
        """
        uniprot_ref = make_ref_source(self.record['uniprot']['@source'],
                                      PROPS['UniProt ID'],
                                      self.external_ids['UniProt ID'],
                                      login=self.login)

        try:
            statements = [
                wdi_core.WDItemID(self.protein_wdid,
                                  PROPS['encodes'],
                                  references=[uniprot_ref])
            ]
            wd_item_gene = wdi_core.WDItemEngine(
                wd_item_id=self.gene_wdid,
                domain='genes',
                data=statements,
                append_value=[PROPS['encodes']])
            wdi_helpers.try_write(wd_item_gene,
                                  self.external_ids['UniProt ID'],
                                  PROPS['UniProt ID'],
                                  self.login,
                                  write=write)
        except Exception as e:
            exc_info = sys.exc_info()
            traceback.print_exception(*exc_info)
            msg = wdi_helpers.format_msg(self.external_ids['UniProt ID'],
                                         PROPS['UniProt ID'],
                                         None,
                                         str(e),
                                         msg_type=type(e))
            wdi_core.WDItemEngine.log("ERROR", msg)
示例#5
0
    def create_relationships(self, login, write=True):
        try:
            # endpoint may not get updated in time?
            self.do_wdid_lookup()
        except KeyError as e:
            wdi_core.WDItemEngine.log("ERROR", format_msg(self.id, INTERPRO, None, str(e), type(e)))
            return

        statements = [wdi_core.WDExternalID(value=self.id, prop_nr=INTERPRO, references=[self.reference])]
        if self.parent:
            # subclass of
            statements.append(wdi_core.WDItemID(value=self.parent_wdid, prop_nr='P279', references=[self.reference]))
        if self.contains:
            for c in self.contains_wdid:
                statements.append(wdi_core.WDItemID(value=c, prop_nr='P527', references=[self.reference]))  # has part
        if self.found_in:
            for f in self.found_in_wdid:
                statements.append(wdi_core.WDItemID(value=f, prop_nr='P361', references=[self.reference]))  # part of
        if len(statements) == 1:
            return

        wd_item = wdi_core.WDItemEngine(wd_item_id=self.wdid, domain='interpro', data=statements,
                                        append_value=['P279', 'P527', 'P361'],
                                        fast_run=True, fast_run_base_filter=IPRTerm.fast_run_base_filter)

        wdi_helpers.try_write(wd_item, self.id, INTERPRO, login, edit_summary="create/update subclass/has part/part of",
                              write=write)
    def run_one(self, record, gene_wdid, write):
        protein = Protein(record, self.organism_info, gene_wdid, self.login)
        try:
            protein.parse_external_ids()
            uniprot = protein.external_ids['UniProt ID']
        except Exception as e:
            msg = wdi_helpers.format_msg(gene_wdid,
                                         None,
                                         None,
                                         str(e),
                                         msg_type=type(e))
            wdi_core.WDItemEngine.log("ERROR", msg)
            return

        # some proteins are encoded by multiple genes. don't try to create it again
        if uniprot in self.uniprot_qid:
            qid = self.uniprot_qid[uniprot]
            wditem = protein.update_item(qid, fast_run=fast_run, write=write)
        else:
            wditem = protein.create_item(fast_run=fast_run, write=write)
        if wditem is not None:
            self.uniprot_qid[uniprot] = wditem.wd_item_id
            protein.make_gene_encodes(write=write)
        if protein.status is not True:
            self.failed.append(protein.uniprot)
示例#7
0
 def create_depend(self, login=None, write=True):
     if self.deprecated:
         return None
     if not self.wd_item_id:
         print("must create item first: {}".format(node.id_purl))
         return None
     try:
         s = self.create_main_statements()
         wd_item = wdi_core.WDItemEngine(
             wd_item_id=self.wd_item_id,
             data=s,
             domain=self.domain,
             append_value=[PROPS['subclass of'], PROPS['instance of']],
             fast_run=self.fast_run,
             fast_run_base_filter={self.primary_ext_prop_qid: ''})
         wdi_helpers.try_write(wd_item,
                               record_id=self.id_colon,
                               record_prop=self.primary_ext_prop_qid,
                               login=login,
                               write=write)
         return wd_item
     except Exception as e:
         exc_info = sys.exc_info()
         traceback.print_exception(*exc_info)
         msg = wdi_helpers.format_msg(self.id_colon,
                                      self.primary_ext_prop_qid,
                                      None,
                                      str(e),
                                      msg_type=type(e))
         wdi_core.WDItemEngine.log("ERROR", msg)
示例#8
0
    def make_statement_from_edge(self, edge):
        # custom statement creator for regulates
        h = self.helper
        if edge['pred'] in {
                'http://purl.obolibrary.org/obo/RO_0002212',
                'http://purl.obolibrary.org/obo/RO_0002213'
        }:
            subj_node = self.uri_node_map[edge['sub']]
            obj_qid = self.get_object_qid(edge['obj'])
            # print(obj_qid, edge['pred'])
            qual_qid = self.uri_node_map[self.regulates[edge['pred']]].qid
            pred_pid = self.PRED_PID_MAP[
                'http://purl.obolibrary.org/obo/RO_0002211']

            if not (obj_qid and qual_qid and pred_pid):
                m = wdi_helpers.format_msg(edge['sub'], None, None,
                                           "failed on edge: {}".format(edge))
                print(m)
                wdi_core.WDItemEngine.log("WARNING", m)
                return None

            qualifier = wdi_core.WDItemID(qual_qid,
                                          h.get_pid(PROPS['subject has role']),
                                          is_qualifier=True)
            return wdi_core.WDItemID(
                obj_qid,
                pred_pid,
                qualifiers=[qualifier],
                references=[subj_node.create_ref_statement()])
        else:
            return super(GOGraph, self).make_statement_from_edge(edge)
    def create_main_statements(self):
        if not self.reference:
            self.create_reference()
        self.s_main = []
        for relationship in self.relationships:
            if relationship[0] not in self.do_graph.edge_prop:
                # s = "unknown relationship: {}".format(relationship[0])
                # msg = wdi_helpers.format_msg(self.doid, 'P699', None, s, msg_type="unknown relationship")
                # wdi_core.WDItemEngine.log("WARNING", msg)
                continue
            if relationship[1] not in self.do_graph.purl_wdid:
                s = "unknown obj: {}".format(relationship[1])
                msg = wdi_helpers.format_msg(self.doid, 'P699', None, s, msg_type="unknown obj")
                wdi_core.WDItemEngine.log("WARNING", msg)
                continue
            self.s_main.append(wdi_core.WDItemID(self.do_graph.purl_wdid[relationship[1]],
                                                 self.do_graph.edge_prop[relationship[0]], references=[self.reference]))
        # add http://purl.obolibrary.org/obo/, exact match
        self.s_main.append(wdi_core.WDString(self.id, PROPS['exact match'], references=[self.reference]))

        if self.doid != "DOID:4":
            # instance of disease
            self.s_main.append(wdi_core.WDItemID('Q12136', PROPS['instance of'], references=[self.reference]))

        miriam_ref = [wdi_core.WDItemID(value="Q16335166", prop_nr='P248', is_reference=True),
                      wdi_core.WDUrl("http://www.ebi.ac.uk/miriam/main/collections/MIR:00000233", 'P854',
                                     is_reference=True)]
        self.s_main.append(wdi_core.WDString("http://identifiers.org/doid/{}".format(self.doid), PROPS['exact match'],
                                             references=[miriam_ref]))
示例#10
0
    def create_item(self, login=None, fast_run=True, write=True):
        # if no login given, write will not be attempted
        statements = [wdi_core.WDExternalID(value=self.id, prop_nr=INTERPRO, references=[self.reference]),
                      wdi_core.WDItemID(value=self.type_wdid, prop_nr=INSTANCE_OF,
                                        references=[self.reference])]

        try:
            wd_item = wdi_core.WDItemEngine(item_name=self.name, domain='interpro', data=statements,
                                            append_value=["P279", "P31"],
                                            fast_run=fast_run, fast_run_base_filter=IPRTerm.fast_run_base_filter)
        except JSONDecodeError as e:
            wdi_core.WDItemEngine.log("ERROR",
                                      wdi_helpers.format_msg(self.id, INTERPRO, None, str(e), msg_type=type(e)))
            return None

        wd_item.set_label(self.name, lang='en')
        for lang, description in self.lang_descr.items():
            if wd_item.get_description(lang=lang) == "":
                wd_item.set_description(description, lang=lang)
        wd_item.set_aliases([self.short_name, self.id])

        if login:
            wdi_helpers.try_write(wd_item, self.id, INTERPRO, login, write=write)

        return wd_item
示例#11
0
    def update_item(self, qid, fast_run=True, write=True):
        print("updating protein: {}".format(qid))
        try:
            self.parse_external_ids()
            self.statements = self.create_statements()

            wd_item_protein = wdi_core.WDItemEngine(wd_item_id=qid, data=self.statements,
                                                    append_value=[PROPS['instance of'], PROPS['encoded by'],
                                                                  PROPS['Ensembl Protein ID'],
                                                                  PROPS['RefSeq Protein ID']],
                                                    fast_run=fast_run,
                                                    fast_run_base_filter={PROPS['UniProt ID']: '',
                                                                          PROPS['found in taxon']: self.organism_info[
                                                                              'wdid']},
                                                    fast_run_use_refs=True, ref_handler=update_retrieved_if_new,
                                                    global_ref_mode="CUSTOM",
                                                    core_props=core_props)
            wdi_helpers.try_write(wd_item_protein, self.external_ids['UniProt ID'], PROPS['UniProt ID'], self.login,
                                  write=write)
            self.protein_wdid = wd_item_protein.wd_item_id
            return wd_item_protein
        except Exception as e:
            exc_info = sys.exc_info()
            traceback.print_exception(*exc_info)
            msg = wdi_helpers.format_msg(self.external_ids['Entrez Gene ID'], PROPS['Entrez Gene ID'], None,
                                         str(e), msg_type=type(e))
            wdi_core.WDItemEngine.log("ERROR", msg)
            return None
示例#12
0
    def make_gene_encodes(self, write=True):
        """
        Add an "encodes" statement to the gene item
        :return:
        """
        uniprot_ref = make_ref_source(self.record['uniprot']['@source'], PROPS['UniProt ID'],
                                      self.external_ids['UniProt ID'],
                                      login=self.login)

        try:
            statements = [wdi_core.WDItemID(self.protein_wdid, PROPS['encodes'], references=[uniprot_ref])]
            wd_item_gene = wdi_core.WDItemEngine(wd_item_id=self.gene_wdid,data=statements,
                                                 append_value=[PROPS['encodes']], fast_run=fast_run,
                                                 fast_run_base_filter={PROPS['Entrez Gene ID']: '',
                                                                       PROPS['found in taxon']: self.organism_info[
                                                                           'wdid']},
                                                 global_ref_mode="CUSTOM", ref_handler=update_retrieved_if_new,
                                                 core_props=core_props)
            wdi_helpers.try_write(wd_item_gene, self.external_ids['UniProt ID'], PROPS['UniProt ID'], self.login,
                                  write=write)
        except Exception as e:
            exc_info = sys.exc_info()
            traceback.print_exception(*exc_info)
            msg = wdi_helpers.format_msg(self.external_ids['UniProt ID'], PROPS['UniProt ID'], None,
                                         str(e), msg_type=type(e))
            wdi_core.WDItemEngine.log("ERROR", msg)
示例#13
0
def run_one(taxid, genbank_id):
    # get the QID
    taxid = str(taxid)
    if taxid not in tax_qid_map:
        msg = wdi_helpers.format_msg(
            genbank_id, PROPS['GenBank Assembly accession'], "",
            "organism with taxid {} not found or skipped".format(taxid))
        wdi_core.WDItemEngine.log("WARNING", msg)
        return None
    qid = tax_qid_map[taxid]
    reference = create_reference(genbank_id)
    genbank_statement = wdi_core.WDExternalID(
        genbank_id,
        PROPS['GenBank Assembly accession'],
        references=[reference])

    # create the item object, specifying the qid
    item = wdi_core.WDItemEngine(
        data=[genbank_statement],
        wd_item_id=qid,
        fast_run=True,
        fast_run_base_filter={PROPS['GenBank Assembly accession']: ''},
        global_ref_mode='CUSTOM',
        fast_run_use_refs=True,
        ref_handler=update_retrieved_if_new)

    wdi_helpers.try_write(item,
                          record_id=genbank_id,
                          record_prop=PROPS['GenBank Assembly accession'],
                          login=login,
                          edit_summary="update GenBank Assembly accession")
示例#14
0
    def get_object_qid(self, edge_obj):
        # object in an edge could be anything. it doesn't have to be a URI that exists within this graph
        # for example, we could be running the DO, and it have an object that is an UBERON class

        # first. check if this URI exists in our graph
        if edge_obj in self.uri_node_map:
            return self.uri_node_map[edge_obj].qid

        # if not, check if the prefix exists in wikidata
        try:
            obj_pid, obj_value = cu.parse_curie(cu.uri_to_curie(edge_obj))
        except Exception as e:
            m = wdi_helpers.format_msg(
                None, None, None, "edge object not found: {}".format(edge_obj))
            print(m)
            wdi_core.WDItemEngine.log("WARNING", m)
            return None

        obj_pid = self.helper.get_pid(obj_pid)
        # if this property exists, get all of the values for this property
        if obj_pid not in self.pid_id_mapper:
            print("loading: {}".format(obj_pid))
            id_map = wdi_helpers.id_mapper(obj_pid,
                                           return_as_set=True,
                                           prefer_exact_match=True,
                                           endpoint=self.sparql_endpoint_url)
            self.pid_id_mapper[obj_pid] = id_map if id_map else dict()

        # look up by the value
        if obj_value in self.pid_id_mapper[obj_pid]:
            obj_qids = self.pid_id_mapper[obj_pid][obj_value]
            if len(obj_qids) == 1:
                return list(obj_qids)[0]
            else:
                m = wdi_helpers.format_msg(
                    None, None, None,
                    "multiple qids ({}) found for: {}".format(
                        obj_qids, edge_obj))
                print(m)
                wdi_core.WDItemEngine.log("WARNING", m)
        else:
            m = wdi_helpers.format_msg(
                None, None, None, "no qids found for: {}".format(edge_obj))
            print(m)
            wdi_core.WDItemEngine.log("WARNING", m)
 def parse_nodes(self, nodes):
     for node in nodes:
         try:
             tmp_node = DONode(node, self)
             if "http://purl.obolibrary.org/obo/DOID_" in tmp_node.id and not tmp_node.deprecated and tmp_node.type == "CLASS":
                 self.nodes[tmp_node.id] = tmp_node
         except Exception as e:
             msg = wdi_helpers.format_msg(node['id'], 'P699', None, str(e), msg_type=type(e))
             wdi_core.WDItemEngine.log("ERROR", msg)
示例#16
0
def run_shex_manifest():
    print(os.environ["SHEX_MANIFEST"])
    manifest = jsonasobj.loads(requests.get(os.environ["SHEX_MANIFEST"]).text)
    for case in manifest:
        if case.data.startswith("Endpoint:"):
            sparql_endpoint = case.data.replace("Endpoint: ", "")
            schema = requests.get(case.schemaURL).text
            shex = ShExC(schema).schema
            evaluator = ShExEvaluator(schema=shex, debug=True)
            sparql_query = case.queryMap.replace("SPARQL '''",
                                                 "").replace("'''@START", "")

            df = wdi_core.WDItemEngine.execute_sparql_query(sparql_query)
            for row in df["results"]["bindings"]:
                wdid = row["item"]["value"]
                slurpeddata = SlurpyGraph(sparql_endpoint)
                try:
                    if os.environ["debug"] == "True":
                        debug = True
                    elif os.environ["debug"] == "False":
                        debug = False
                    results = evaluator.evaluate(rdf=slurpeddata,
                                                 focus=wdid,
                                                 debug=debug)
                    for result in results:
                        if result.result:
                            print(str(result.focus) + ": INFO")
                            msg = wdi_helpers.format_msg(
                                wdid, wdid, None, 'CONFORMS', '')

                            wdi_core.WDItemEngine.log("INFO", msg)
                        else:
                            msg = wdi_helpers.format_msg(
                                wdid, wdid, None, '', result.reason)
                            wdi_core.WDItemEngine.log("ERROR", msg)

                except RuntimeError:
                    print(
                        "Continue after 1 minute, no validation happened on" +
                        wdid)
                    continue
示例#17
0
    def create_edges(self, login, write=True):

        # skip edges where the subject is not one of our nodes
        all_uris = set(node.id_uri for node in self.nodes)
        skipped_edges = [e for e in self.edges if e['sub'] not in all_uris]
        print("skipping {} edges where the subject is a node that is being skipped".format(len(skipped_edges)))

        for node in tqdm(self.nodes, desc="creating edges"):
            if not node.qid:
                m = wdi_helpers.format_msg(node.id_curie, node.id_pid, None, "QID not found, skipping edges")
                print(m)
                wdi_core.WDItemEngine.log("WARNING", m)
                continue
            this_uri = node.id_uri
            this_edges = [edge for edge in self.edges if edge['sub'] == this_uri]
            ss = []
            for edge in this_edges:
                s = self.make_statement_from_edge(edge)
                if s and s.get_value():
                    ss.append(s)

            # set instance of using the root node
            root_nodes = self.root_node[node.id_uri]
            for root_node in root_nodes:
                # don't add instance of self!
                if root_node in self.uri_node_map and root_node != node.id_uri:
                    # print("{} root node {}".format(node.id_uri, root_node))
                    ref = node.create_ref_statement()
                    value_qid = self.uri_node_map[root_node].qid
                    if value_qid:
                        ss.append(wdi_core.WDItemID(value_qid, self.helper.get_pid('P31'), references=[ref]))

            if not ss:
                # there are no statements for this node
                continue

            # print("{}".format([(x.get_value(), x.get_prop_nr()) for x in ss]))
            item = wdi_core.WDItemEngine(
                wd_item_id=node.qid, data=ss,
                append_value=self.APPEND_PROPS,
                fast_run=self.FAST_RUN,
                fast_run_base_filter={node.id_pid: ''},
                fast_run_use_refs=True,
                global_ref_mode='CUSTOM',
                ref_handler=self.ref_handler,
                sparql_endpoint_url=self.sparql_endpoint_url,
                mediawiki_api_url=self.mediawiki_api_url,
                core_props=self.CORE_IDS
            )
            this_pid, this_value = cu.parse_curie(cu.uri_to_curie(this_uri))
            this_pid = self.helper.get_pid(this_pid)
            wdi_helpers.try_write(item, record_id=this_value, record_prop=this_pid,
                                  login=login, write=write)
示例#18
0
 def create_xref_statement(self, xref):
     ref = self.create_ref_statement()
     if xref.split(":")[0] not in cu.curie_map:
         # log this curie prefix not being found
         m = wdi_helpers.format_msg(self.id_curie, self.id_pid, self.qid,
                                    "curie prefix not found: {}".format(xref.split(":")[0]))
         wdi_core.WDItemEngine.log("WARNING", m)
         return None
     pid, ext_id = cu.parse_curie(xref)
     pid = self.helper.get_pid(pid)
     self.pids.add(pid)
     return wdi_core.WDExternalID(ext_id, pid, references=[ref])
示例#19
0
    def create(self, login, write=True, allow_new=True):
        # create or get qid
        # creates the primary external ID, the xrefs, instance of (if set), checks label, description, and aliases
        # not other properties (i.e. subclass), as these may require items existing that may not exist yet
        self._pre_create()
        assert self.id_curie
        s = self.create_statements()

        primary_ext_id_pid, primary_ext_id = cu.parse_curie(self.id_curie)
        primary_ext_id_pid = self.helper.get_pid(primary_ext_id_pid)
        assert primary_ext_id_pid in self.graph.APPEND_PROPS

        try:
            self.item = wdi_core.WDItemEngine(
                data=s,
                append_value=self.graph.APPEND_PROPS,
                fast_run=self.graph.FAST_RUN,
                fast_run_base_filter={primary_ext_id_pid: ''},
                fast_run_use_refs=True,
                global_ref_mode='CUSTOM',
                ref_handler=self.ref_handler,
                mediawiki_api_url=self.mediawiki_api_url,
                sparql_endpoint_url=self.sparql_endpoint_url,
                core_props=self.graph.CORE_IDS,
                core_prop_match_thresh=.9
            )
            # assert the retrieved item doesn't already have a primary_ext_id id
            if self.item.wd_item_id:
                query = "select ?primary_ext_id where {{ wd:{} wdt:{} ?primary_ext_id }}".format(self.item.wd_item_id,
                                                                                                 primary_ext_id_pid)
                results = wdi_core.WDItemEngine.execute_sparql_query(query)['results']['bindings']
                if results:
                    existing_primary_ext_id = [x['primary_ext_id']['value'] for x in results]
                    if self.id_curie not in existing_primary_ext_id:
                        raise Exception(
                            "conflicting primary_ext_id IDs: {} on {}".format(self.id_curie, self.item.wd_item_id))
            if self.item.create_new_item and not allow_new:
                return None
        except Exception as e:
            traceback.print_exc()
            msg = wdi_helpers.format_msg(primary_ext_id, primary_ext_id_pid, None, str(e), msg_type=type(e))
            wdi_core.WDItemEngine.log("ERROR", msg)
            return
        self.set_label(self.item)
        self.set_descr(self.item)
        self.set_aliases(self.item)
        # todo: I want to avoid this from happening: https://www.wikidata.org/w/index.php?title=Q4553565&diff=676750840&oldid=647941942

        wdi_helpers.try_write(self.item, record_id=primary_ext_id, record_prop=primary_ext_id_pid,
                              login=login, write=write)

        self.qid = self.item.wd_item_id
示例#20
0
def run_shex_manifest():
    manifest = jsonasobj.loads(
        requests.get(
            "https://raw.githubusercontent.com/SuLab/Genewiki-ShEx/master/pathways/reactome/manifest.json"
        ).text)
    for case in manifest:
        if case.data.startswith("Endpoint:"):
            sparql_endpoint = case.data.replace("Endpoint: ", "")
            schema = requests.get(case.schemaURL).text
            shex = ShExC(schema).schema
            evaluator = ShExEvaluator(schema=shex, debug=True)
            sparql_query = case.queryMap.replace("SPARQL '''",
                                                 "").replace("'''@START", "")

            df = wdi_core.WDItemEngine.execute_sparql_query(sparql_query)
            for row in df["results"]["bindings"]:
                wdid = row["item"]["value"]
                slurpeddata = SlurpyGraph(sparql_endpoint)
                try:
                    results = evaluator.evaluate(rdf=slurpeddata,
                                                 focus=wdid,
                                                 debug=False)
                    for result in results:
                        if result.result:
                            print(str(result.focus) + ": INFO")
                            msg = wdi_helpers.format_msg(
                                wdid, wdid, None, 'CONFORMS', '')

                            wdi_core.WDItemEngine.log("INFO", msg)
                        else:
                            msg = wdi_helpers.format_msg(
                                wdid, wdid, None, '', '')
                            wdi_core.WDItemEngine.log("ERROR", s)

                except RuntimeError:
                    print(
                        "Continue after 1 minute, no validation happened on" +
                        wdid)
                    continue
示例#21
0
    def create_item(self, fast_run=True, write=True):
        try:
            self.parse_external_ids()
            self.statements = self.create_statements()
            self.create_label()
            self.create_description()
            self.create_aliases()

            wd_item_protein = wdi_core.WDItemEngine(
                item_name=self.label,
                domain='proteins',
                data=self.statements,
                append_value=[PROPS['instance of'], PROPS['encoded by']],
                # PROPS['Ensembl Protein ID'], PROPS['RefSeq Protein ID']],
                fast_run=fast_run,
                fast_run_base_filter={
                    PROPS['UniProt ID']: '',
                    PROPS['found in taxon']: self.organism_info['wdid']
                },
                fast_run_use_refs=True,
                ref_handler=update_retrieved_if_new,
                global_ref_mode="CUSTOM",
                core_props=core_props)
            wd_item_protein.set_label(self.label)
            wd_item_protein.set_description(self.description, lang='en')

            # remove the alias "protein"
            current_aliases = set(wd_item_protein.get_aliases())
            aliases = current_aliases | set(self.aliases)
            if "protein" in aliases:
                aliases.remove("protein")
            wd_item_protein.set_aliases(aliases, append=False)
            self.status = wdi_helpers.try_write(
                wd_item_protein,
                self.external_ids['UniProt ID'],
                PROPS['UniProt ID'],
                self.login,
                write=write)
            self.protein_wdid = wd_item_protein.wd_item_id
            return wd_item_protein
        except Exception as e:
            exc_info = sys.exc_info()
            traceback.print_exception(*exc_info)
            msg = wdi_helpers.format_msg(self.external_ids['Entrez Gene ID'],
                                         PROPS['Entrez Gene ID'],
                                         None,
                                         str(e),
                                         msg_type=type(e))
            wdi_core.WDItemEngine.log("ERROR", msg)
            self.status = msg
            return None
示例#22
0
def main(retrieved, fast_run, write):
    login = wdi_login.WDLogin(WDUSER, WDPASS)
    temp = Graph()
    url = 'http://data.wikipathways.org/current/rdf'
    page = requests.get(url).text
    files = []
    for link in BeautifulSoup(page, "lxml", parse_only=SoupStrainer('a')):
        address = str(link).split("\"")
        if len(address) > 1:
            filename = address[1].replace("./", "/")
            if len(filename) > 1:
                if filename not in files:
                    if filename != "./":
                        files.append(url + filename)
    wpids = []
    for file in set(files):
        if "rdf-wp" in file:  # get the most accurate file
            print(file)
            u = requests.get(file)
            with closing(u), zipfile.ZipFile(io.BytesIO(u.content)) as archive:
                for member in archive.infolist():
                    nt_content = archive.read(member)
                    # print(nt_content)
                    temp.parse(data=nt_content.decode(), format="turtle")
            print("size: " + str(len(temp)))

    wp_query = """prefix dcterm: <http://purl.org/dc/terms/>
            prefix wp: <http://vocabularies.wikipathways.org/wp#>
            SELECT DISTINCT ?wpid WHERE {
              ?s rdf:type <http://vocabularies.wikipathways.org/wp#Pathway> ;
                 dcterm:identifier ?wpid ;
                 ?p <http://vocabularies.wikipathways.org/wp#Curation:AnalysisCollection> ;
                 wp:organism <http://purl.obolibrary.org/obo/NCBITaxon_9606> .
              }"""

    qres = temp.query(wp_query)
    for row in qres:
        print("%s" % row)
        wpids.append(str(row[0]))

    for pathway_id in wpids:
        try:
            run_one(pathway_id, retrieved, fast_run, write, login, temp)
        except Exception as e:
            traceback.print_exc()
            wdi_core.WDItemEngine.log(
                "ERROR",
                wdi_helpers.format_msg(pathway_id, PROPS['Wikipathways ID'],
                                       None, str(e), type(e)))
示例#23
0
def validate_docs(docs, doc_type, external_id_prop):
    assert doc_type in {'eukaryotic', 'microbial'}
    if doc_type == "microbial":
        f = validate_doc_microbial
    else:
        f = validate_doc_eukaryotic
    for doc in docs:
        try:
            doc = f(doc)
        except ValueError as e:
            print(e)
            wdi_core.WDItemEngine.log("WARNING",
                                      wdi_helpers.format_msg(doc['_id'], external_id_prop, None, str(e), type(e)))
            continue
        yield doc
示例#24
0
 def run(self, records, total=None, fast_run=True, write=True):
     for record in tqdm(records, mininterval=2, total=total):
         entrez_gene = str(record['entrezgene']['@value'])
         if entrez_gene not in self.gene_wdid_mapping:
             wdi_core.WDItemEngine.log(
                 "WARNING",
                 format_msg(entrez_gene, "P351", None,
                            "Gene item not found during protein creation",
                            None))
             continue
         gene_wdid = self.gene_wdid_mapping[entrez_gene]
         protein = Protein(record, self.organism_info, gene_wdid,
                           self.login)
         protein.create_item(fast_run=fast_run, write=write)
         protein.make_gene_encodes(write=write)
示例#25
0
def validate_docs(docs, doc_type, external_id_prop):
    assert doc_type in {'gene', 'protein', 'microbial'}
    if doc_type == "microbial":
        f = validate_doc_microbial
    else:
        f = functools.partial(validate_doc, doc_type=doc_type)
    for doc in docs:
        try:
            doc = f(doc)
        except AssertionError as e:
            exc_info = sys.exc_info()
            traceback.print_exception(*exc_info)
            wdi_core.WDItemEngine.log("WARNING",
                                      wdi_helpers.format_msg(doc['_id'], external_id_prop, None, str(e), type(e)))
            continue
        yield doc
示例#26
0
 def run(self, records, total=None, fast_run=True, write=True):
     records = self.filter(records)
     for record in tqdm(records, mininterval=2, total=total):
         # print(record['entrezgene'])
         gene = self.GENE_CLASS(record, self.organism_info, self.chr_num_wdid, self.login)
         try:
             gene.create_item(fast_run=fast_run, write=write)
         except Exception as e:
             exc_info = sys.exc_info()
             traceback.print_exception(*exc_info)
             msg = wdi_helpers.format_msg(gene.external_ids['Entrez Gene ID'], PROPS['Entrez Gene ID'], None,
                                          str(e), msg_type=type(e))
             wdi_core.WDItemEngine.log("ERROR", msg)
             gene.status = msg
         if gene.status is not True:
             self.failed.append(gene.entrez)
示例#27
0
    def create_item(self, login=None, write=True):
        if self.deprecated:
            return None
        try:
            s = []
            s.extend(self.create_xref_statements())
            s.extend(self.create_main_statements_nodepend())

            wd_item = wdi_core.WDItemEngine(
                item_name=self.lbl,
                data=s,
                domain=self.domain,
                append_value=[PROPS['subclass of'], PROPS['instance of']],
                fast_run=self.fast_run,
                fast_run_base_filter={self.primary_ext_prop_qid: ''})
            if wd_item.get_label(lang="en") == "":
                wd_item.set_label(self.lbl, lang="en")
            current_descr = wd_item.get_description(lang='en')
            if current_descr.lower() in {
                    "", self.default_label
            } and self.definition and len(self.definition) < 250:
                wd_item.set_description(description=self.definition, lang='en')
            elif current_descr.lower() == "":
                wd_item.set_description(description=self.default_label,
                                        lang='en')
            if self.synonyms is not None:
                wd_item.set_aliases(aliases=self.synonyms,
                                    lang='en',
                                    append=True)
            if self.wikilink is not None:
                wd_item.set_sitelink(site="enwiki", title=self.wikilink)
            wdi_helpers.try_write(wd_item,
                                  record_id=self.id_colon,
                                  record_prop=self.primary_ext_prop_qid,
                                  login=login,
                                  write=write)
            self.wd_item_id = wd_item.wd_item_id
            return wd_item
        except Exception as e:
            exc_info = sys.exc_info()
            traceback.print_exception(*exc_info)
            msg = wdi_helpers.format_msg(self.id_colon,
                                         self.primary_ext_prop_qid,
                                         None,
                                         str(e),
                                         msg_type=type(e))
            wdi_core.WDItemEngine.log("ERROR", msg)
示例#28
0
 def write_item(self, wd_item):
     if self.write:
         try:
             wdi_helpers.try_write(wd_item['item'],
                                   record_id=wd_item['record_id'],
                                   record_prop=wd_item['record_prop'],
                                   edit_summary='edit genetic association',
                                   login=self.login,
                                   write=self.write)
         except Exception as e:
             print(e)
             wdi_core.WDItemEngine.log(
                 "ERROR",
                 wdi_helpers.format_msg(wd_item['record_id'],
                                        wd_item['record_prop'],
                                        wd_item['item'].wd_item_id, str(e),
                                        type(e)))
示例#29
0
    def get_or_create_article(self, pmid):
        # check if exists in wikidata
        if pmid in self.pmid_wdid_map:
            return self.pmid_wdid_map[pmid]
        else:
            p = wdi_helpers.PubmedItem(pmid)
            if self.write:
                wdid = p.get_or_create(self.login)
            else:
                wdid = 'Q1'  # Dummy ID
                wdi_core.WDItemEngine.log(
                    "INFO",
                    wdi_helpers.format_msg(pmid, PROPS['PubMed ID'], wdid,
                                           "CREATE"))
            self.pmid_wdid_map[pmid] = wdid

        return wdid
示例#30
0
 def create(self, write=True):
     if self.deprecated:
         return None
     try:
         self.create_xref_statements()
         self.s.extend(self.s_xref)
         self.create_main_statements()
         self.s.extend(self.s_main)
         wd_item = wdi_core.WDItemEngine(
             item_name=self.lbl,
             data=self.s,
             domain="diseases",
             append_value=[PROPS['subclass of'], PROPS['instance of']],
             fast_run=self.do_graph.fast_run,
             fast_run_base_filter={'P699': ''})
         if wd_item.get_label(lang="en") == "":
             wd_item.set_label(self.lbl, lang="en")
         current_descr = wd_item.get_description(lang='en')
         if current_descr.lower() in {
                 "", "human disease", "disease"
         } and self.definition and len(self.definition) < 250:
             wd_item.set_description(description=self.definition, lang='en')
         elif current_descr.lower() == "":
             wd_item.set_description(description="human disease", lang='en')
         if self.synonyms is not None:
             wd_item.set_aliases(aliases=self.synonyms,
                                 lang='en',
                                 append=True)
         if self.wikilink is not None:
             wd_item.set_sitelink(site="enwiki", title=self.wikilink)
         wdi_helpers.try_write(wd_item,
                               record_id=self.id,
                               record_prop='P699',
                               login=self.do_graph.login,
                               write=write)
         return wd_item
     except Exception as e:
         exc_info = sys.exc_info()
         traceback.print_exception(*exc_info)
         msg = wdi_helpers.format_msg(self.doid,
                                      'P699',
                                      None,
                                      str(e),
                                      msg_type=type(e))
         wdi_core.WDItemEngine.log("ERROR", msg)