Пример #1
0
def run_one(wd_expressed_in_statements: dict,
            login,
            append_data: bool = APPEND_DATA):
    """Insert statements of wikidata_gene_id expressed in wikidata_organ_id along with its reference.

    :param wd_expressed_in_statements: the Wikidata expressed in statements dictionary where the key is a wikidata gene
     id and the value is a list of Wikidata anatomic entity items.
    :param login: the Wikidata login object.
    """
    # create the item object, specifying the qid
    count = 0
    for wikidata_gene_id, organ_statements in wd_expressed_in_statements.items(
    ):
        if append_data:
            item = wdi_core.WDItemEngine(wd_item_id=wikidata_gene_id,
                                         search_only=True,
                                         global_ref_mode=APPEND_REFERENCE_MODE)
            item.update(organ_statements, [PROPS['expressed in']])
        else:
            item = wdi_core.WDItemEngine(
                data=organ_statements,
                wd_item_id=wikidata_gene_id,
                fast_run=True,
                fast_run_base_filter={PROPS['expressed in']: ''})
        wdi_helpers.try_write(
            item,
            record_id=wikidata_gene_id + "-" + str(count),
            record_prop=PROPS['expressed in'],
            login=login,
            edit_summary="Update gene expression based on the Bgee database")
        count = count + 1
Пример #2
0
 def write_item(self, wd_item):
     wdi_helpers.try_write(wd_item['item'],
                           record_id=wd_item['record_id'],
                           record_prop=wd_item['record_prop'],
                           edit_summary='edit genetic association',
                           login=self.login,
                           write=self.write)
Пример #3
0
def do_item(entrezgene, orthologs, reference, entrez_homo, entrez_taxon,
            taxon_wdid, entrez_wdid, login, write):
    entrezgene = str(entrezgene)
    s = []
    this_ref = reference(entrez_homo[entrezgene])
    for ortholog in orthologs:
        ortholog = str(ortholog)
        if ortholog == entrezgene:
            continue
        if ortholog not in entrez_taxon:
            raise ValueError("missing taxid for: " + ortholog)
        qualifier = wdi_core.WDItemID(taxon_wdid[entrez_taxon[ortholog]],
                                      PROPS['found in taxon'],
                                      is_qualifier=True)
        s.append(
            wdi_core.WDItemID(entrez_wdid[ortholog],
                              PROPS['ortholog'],
                              references=[this_ref],
                              qualifiers=[qualifier]))
    item = wdi_core.WDItemEngine(wd_item_id=entrez_wdid[entrezgene],
                                 data=s,
                                 fast_run=fast_run,
                                 fast_run_base_filter={
                                     PROPS['Entrez Gene ID']:
                                     '',
                                     PROPS['found in taxon']:
                                     taxon_wdid[entrez_taxon[entrezgene]]
                                 },
                                 core_props=core_props)
    wdi_helpers.try_write(item,
                          entrezgene,
                          PROPS['Entrez Gene ID'],
                          edit_summary="edit orthologs",
                          login=login,
                          write=write)
def do_pharm_prod(drug_qid, brand_rxnorm, emea, url, brand_name):
    # write info on the pharmaceutical product page
    ref = create_ref_statement(emea, url)
    # has active substance
    s = [wdi_core.WDItemID(drug_qid, 'P3781', references=[ref])]
    # instance of
    s.append(wdi_core.WDItemID('Q28885102', 'P31',
                               references=[ref]))  # pharmaceutical product
    s.append(wdi_core.WDItemID('Q169336', 'P31',
                               references=[ref]))  # chemical mixture
    # emea
    s.append(wdi_core.WDExternalID(emea, 'P3637', references=[ref]))

    if not pd.isnull(brand_rxnorm):
        s.append(wdi_core.WDExternalID(str(int(brand_rxnorm)), "P3345"))
    item = wdi_core.WDItemEngine(item_name=brand_name,
                                 data=s,
                                 domain="drugs",
                                 append_value=['P3781'])
    item.set_label(brand_name)
    if item.get_description() == '':
        item.set_description("pharmaceutical product")
    wdi_helpers.try_write(item,
                          emea,
                          'P3637',
                          login,
                          edit_summary="add 'active ingredient'")

    return item.wd_item_id
Пример #5
0
 def create_depend(self, login=None, write=True):
     if self.deprecated:
         return None
     if not self.wd_item_id:
         print("must create item first: {}".format(node.id_purl))
         return None
     try:
         s = self.create_main_statements()
         wd_item = wdi_core.WDItemEngine(
             wd_item_id=self.wd_item_id,
             data=s,
             domain=self.domain,
             append_value=[PROPS['subclass of'], PROPS['instance of']],
             fast_run=self.fast_run,
             fast_run_base_filter={self.primary_ext_prop_qid: ''})
         wdi_helpers.try_write(wd_item,
                               record_id=self.id_colon,
                               record_prop=self.primary_ext_prop_qid,
                               login=login,
                               write=write)
         return wd_item
     except Exception as e:
         exc_info = sys.exc_info()
         traceback.print_exception(*exc_info)
         msg = wdi_helpers.format_msg(self.id_colon,
                                      self.primary_ext_prop_qid,
                                      None,
                                      str(e),
                                      msg_type=type(e))
         wdi_core.WDItemEngine.log("ERROR", msg)
Пример #6
0
    def get_or_create(self, login):
        self.validate()
        self.make_reference()
        self.make_statements()
        self.make_ext_id_statements()
        self.make_author_statements()

        item = wdi_core.WDItemEngine(
            data=self.statements,
            append_value=[PROPS['DOI'], PROPS['PMCID'], PROPS['PubMed ID']],
            # ref_handler=update_retrieved_if_new_multiple_refs()
        )

        if item.wd_item_id:
            return item.wd_item_id, self.warnings, True

        self.set_label(item)
        self.set_description(item)

        if self.source == 'arxiv':
            success = try_write(item, self.ids['arxiv'], PROPS["arxiv id"],
                                login)
        elif self.source == 'biorxiv':
            success = try_write(item, self.ids['biorxiv'], PROPS["biorxiv id"],
                                login)
        else:
            success = try_write(item, self.ids['doi'], PROPS["DOI"], login)
        return item.wd_item_id, self.warnings, success
Пример #7
0
def run_one(row):
    label = row['#Organism/Name']
    taxid = str(row['TaxID'])
    genbank_id = row['Assembly Accession']
    s = [
        wdi_core.WDExternalID(genbank_id,
                              uri_map[PROPS['GenBank Assembly accession']],
                              references=[create_reference(genbank_id)]),
        wdi_core.WDExternalID(taxid,
                              uri_map[PROPS['NCBI Taxonomy ID']],
                              references=[create_reference(genbank_id)]),
    ]
    item = localItemEngine(
        data=s,
        item_name=label,
        domain="organism",
        fast_run=True,
        fast_run_base_filter={uri_map[PROPS['NCBI Taxonomy ID']]: ''})
    item.set_label(label)
    item.set_description("bug")
    wdi_helpers.try_write(
        item,
        login=login,
        record_id=genbank_id,
        record_prop=uri_map[PROPS['GenBank Assembly accession']])
Пример #8
0
def run_one(taxid, genbank_id):
    # get the QID
    taxid = str(taxid)
    if taxid not in tax_qid_map:
        msg = wdi_helpers.format_msg(
            genbank_id, PROPS['GenBank Assembly accession'], "",
            "organism with taxid {} not found or skipped".format(taxid))
        wdi_core.WDItemEngine.log("WARNING", msg)
        return None
    qid = tax_qid_map[taxid]
    reference = create_reference(genbank_id)
    genbank_statement = wdi_core.WDExternalID(
        genbank_id,
        PROPS['GenBank Assembly accession'],
        references=[reference])

    # create the item object, specifying the qid
    item = wdi_core.WDItemEngine(
        data=[genbank_statement],
        wd_item_id=qid,
        fast_run=True,
        fast_run_base_filter={PROPS['GenBank Assembly accession']: ''},
        global_ref_mode='CUSTOM',
        fast_run_use_refs=True,
        ref_handler=update_retrieved_if_new)

    wdi_helpers.try_write(item,
                          record_id=genbank_id,
                          record_prop=PROPS['GenBank Assembly accession'],
                          login=login,
                          edit_summary="update GenBank Assembly accession")
Пример #9
0
    def update_item(self, qid, fast_run=True, write=True):
        print("updating protein: {}".format(qid))
        try:
            self.parse_external_ids()
            self.statements = self.create_statements()

            wd_item_protein = wdi_core.WDItemEngine(wd_item_id=qid, data=self.statements,
                                                    append_value=[PROPS['instance of'], PROPS['encoded by'],
                                                                  PROPS['Ensembl Protein ID'],
                                                                  PROPS['RefSeq Protein ID']],
                                                    fast_run=fast_run,
                                                    fast_run_base_filter={PROPS['UniProt ID']: '',
                                                                          PROPS['found in taxon']: self.organism_info[
                                                                              'wdid']},
                                                    fast_run_use_refs=True, ref_handler=update_retrieved_if_new,
                                                    global_ref_mode="CUSTOM",
                                                    core_props=core_props)
            wdi_helpers.try_write(wd_item_protein, self.external_ids['UniProt ID'], PROPS['UniProt ID'], self.login,
                                  write=write)
            self.protein_wdid = wd_item_protein.wd_item_id
            return wd_item_protein
        except Exception as e:
            exc_info = sys.exc_info()
            traceback.print_exception(*exc_info)
            msg = wdi_helpers.format_msg(self.external_ids['Entrez Gene ID'], PROPS['Entrez Gene ID'], None,
                                         str(e), msg_type=type(e))
            wdi_core.WDItemEngine.log("ERROR", msg)
            return None
Пример #10
0
    def make_gene_encodes(self, write=True):
        """
        Add an "encodes" statement to the gene item
        :return:
        """
        uniprot_ref = make_ref_source(self.record['uniprot']['@source'], PROPS['UniProt ID'],
                                      self.external_ids['UniProt ID'],
                                      login=self.login)

        try:
            statements = [wdi_core.WDItemID(self.protein_wdid, PROPS['encodes'], references=[uniprot_ref])]
            wd_item_gene = wdi_core.WDItemEngine(wd_item_id=self.gene_wdid,data=statements,
                                                 append_value=[PROPS['encodes']], fast_run=fast_run,
                                                 fast_run_base_filter={PROPS['Entrez Gene ID']: '',
                                                                       PROPS['found in taxon']: self.organism_info[
                                                                           'wdid']},
                                                 global_ref_mode="CUSTOM", ref_handler=update_retrieved_if_new,
                                                 core_props=core_props)
            wdi_helpers.try_write(wd_item_gene, self.external_ids['UniProt ID'], PROPS['UniProt ID'], self.login,
                                  write=write)
        except Exception as e:
            exc_info = sys.exc_info()
            traceback.print_exception(*exc_info)
            msg = wdi_helpers.format_msg(self.external_ids['UniProt ID'], PROPS['UniProt ID'], None,
                                         str(e), msg_type=type(e))
            wdi_core.WDItemEngine.log("ERROR", msg)
Пример #11
0
def remove_deprecated_statements(qid, frc, release_wdid, props, login):
    releases = set(INTERPRO_RELEASES.values()) | {'Q3047275'}
    releases = set(int(x.replace("Q", "")) for x in releases)
    # don't count this release
    releases.discard(int(release_wdid.replace("Q", "")))

    # make sure we have these props in frc
    for prop in props:
        frc.write_required([wdi_core.WDString("fake value", prop)])
    orig_statements = frc.reconstruct_statements(qid)

    s_dep = []
    for s in orig_statements:
        if any(
                any(x.get_prop_nr() == 'P248' and x.get_value() in releases
                    for x in r) for r in s.get_references()):
            setattr(s, 'remove', '')
            s_dep.append(s)

    if s_dep:
        print("-----")
        print(qid)
        print(orig_statements)
        print(s_dep)
        print([(x.get_prop_nr(), x.value) for x in s_dep])
        print([(x.get_references()[0]) for x in s_dep])
        wd_item = wdi_core.WDItemEngine(wd_item_id=qid,
                                        domain='none',
                                        data=s_dep,
                                        fast_run=False)
        wdi_helpers.try_write(wd_item,
                              '',
                              '',
                              login,
                              edit_summary="remove deprecated statements")
Пример #12
0
    def make_gene_encodes(self, write=True):
        """
        Add an "encodes" statement to the gene item
        :return:
        """
        uniprot_ref = make_ref_source(self.record['uniprot']['@source'],
                                      PROPS['UniProt ID'],
                                      self.external_ids['UniProt ID'],
                                      login=self.login)

        try:
            statements = [
                wdi_core.WDItemID(self.protein_wdid,
                                  PROPS['encodes'],
                                  references=[uniprot_ref])
            ]
            wd_item_gene = wdi_core.WDItemEngine(
                wd_item_id=self.gene_wdid,
                domain='genes',
                data=statements,
                append_value=[PROPS['encodes']])
            wdi_helpers.try_write(wd_item_gene,
                                  self.external_ids['UniProt ID'],
                                  PROPS['UniProt ID'],
                                  self.login,
                                  write=write)
        except Exception as e:
            exc_info = sys.exc_info()
            traceback.print_exception(*exc_info)
            msg = wdi_helpers.format_msg(self.external_ids['UniProt ID'],
                                         PROPS['UniProt ID'],
                                         None,
                                         str(e),
                                         msg_type=type(e))
            wdi_core.WDItemEngine.log("ERROR", msg)
Пример #13
0
    def run_one_disease(self, disease_qid, records):
        ss = []
        for record in records:
            incidence = float(record['Percent affected'][:-2])
            pmid = record['Pubmed id']
            phenotype_qid = record['phenotype_qid']
            omim_id = record['disease']

            refs = [
                self.create_reference(omim_id, pmid=pmid, login=self.login)
            ]
            qual = self.create_qualifier(incidence)
            s = wdi_core.WDItemID(phenotype_qid,
                                  PROPS['symptoms'],
                                  references=refs,
                                  qualifiers=qual)
            ss.append(s)

        item = self.item_engine(wd_item_id=disease_qid, data=ss)
        assert not item.create_new_item

        try_write(item,
                  record_id=disease_qid,
                  record_prop=PROPS['symptoms'],
                  edit_summary="Add phenotype from mitodb",
                  login=self.login,
                  write=self.write)
Пример #14
0
    def create_item(self, login=None, fast_run=True, write=True):
        # if no login given, write will not be attempted
        statements = [wdi_core.WDExternalID(value=self.id, prop_nr=INTERPRO, references=[self.reference]),
                      wdi_core.WDItemID(value=self.type_wdid, prop_nr=INSTANCE_OF,
                                        references=[self.reference])]

        try:
            wd_item = wdi_core.WDItemEngine(item_name=self.name, domain='interpro', data=statements,
                                            append_value=["P279", "P31"],
                                            fast_run=fast_run, fast_run_base_filter=IPRTerm.fast_run_base_filter)
        except JSONDecodeError as e:
            wdi_core.WDItemEngine.log("ERROR",
                                      wdi_helpers.format_msg(self.id, INTERPRO, None, str(e), msg_type=type(e)))
            return None

        wd_item.set_label(self.name, lang='en')
        for lang, description in self.lang_descr.items():
            if wd_item.get_description(lang=lang) == "":
                wd_item.set_description(description, lang=lang)
        wd_item.set_aliases([self.short_name, self.id])

        if login:
            wdi_helpers.try_write(wd_item, self.id, INTERPRO, login, write=write)

        return wd_item
Пример #15
0
    def create_relationships(self, login, write=True):
        try:
            # endpoint may not get updated in time?
            self.do_wdid_lookup()
        except KeyError as e:
            wdi_core.WDItemEngine.log("ERROR", format_msg(self.id, INTERPRO, None, str(e), type(e)))
            return

        statements = [wdi_core.WDExternalID(value=self.id, prop_nr=INTERPRO, references=[self.reference])]
        if self.parent:
            # subclass of
            statements.append(wdi_core.WDItemID(value=self.parent_wdid, prop_nr='P279', references=[self.reference]))
        if self.contains:
            for c in self.contains_wdid:
                statements.append(wdi_core.WDItemID(value=c, prop_nr='P527', references=[self.reference]))  # has part
        if self.found_in:
            for f in self.found_in_wdid:
                statements.append(wdi_core.WDItemID(value=f, prop_nr='P361', references=[self.reference]))  # part of
        if len(statements) == 1:
            return

        wd_item = wdi_core.WDItemEngine(wd_item_id=self.wdid, domain='interpro', data=statements,
                                        append_value=['P279', 'P527', 'P361'],
                                        fast_run=True, fast_run_base_filter=IPRTerm.fast_run_base_filter)

        wdi_helpers.try_write(wd_item, self.id, INTERPRO, login, edit_summary="create/update subclass/has part/part of",
                              write=write)
Пример #16
0
    def remove_deprecated_statements(self, releases, frc, login):
        """

        :param releases: a set of qid for releases which, when used as 'stated in' on a reference,
        the statement should be removed
        :param frc:
        :param login:
        :return:
        """

        def is_old_ref(ref, releases):
            stated_in = self.helper.get_pid('P248')
            return any(r.get_prop_nr() == stated_in and "Q" + str(r.get_value()) in releases for r in ref)

        qid = self.qid
        primary_ext_id_pid, primary_ext_id = cu.parse_curie(self.id_curie)
        primary_ext_id_pid = self.helper.get_pid(primary_ext_id_pid)

        statements = frc.reconstruct_statements(qid)

        s_remove = []
        s_deprecate = []
        for s in statements:
            if len(s.get_references()) == 1 and is_old_ref(s.get_references()[0], releases):
                # this is the only ref on this statement and its from an old release
                if s.get_prop_nr() == primary_ext_id_pid:
                    # if its on the primary ID for this item, deprecate instead of removing it
                    s.set_rank('deprecated')
                    s_deprecate.append(s)
                else:
                    setattr(s, 'remove', '')
                    s_remove.append(s)
            if len(s.get_references()) > 1 and any(is_old_ref(ref, releases) for ref in s.get_references()):
                # there is another reference on this statement, and a old reference
                # we should just remove the old reference and keep the statement
                s.set_references([ref for ref in s.get_references() if not is_old_ref(ref, releases)])
                s_deprecate.append(s)

        if s_deprecate or s_remove:
            print("-----")
            print(qid)
            print([(x.get_prop_nr(), x.value) for x in s_deprecate])
            print([(x.get_prop_nr(), x.value) for x in s_remove])
            """
            I don't know why I have to split it up like this, but if you try to remove statements with append_value
            set, the statements don't get removed, and if you try to remove a ref off a statement without append_value
            set, then all other statements get removed. It works if you do them seperately...
            """
            if s_deprecate:
                wd_item = wdi_core.WDItemEngine(wd_item_id=qid, domain='none', data=s_deprecate, fast_run=False,
                                                mediawiki_api_url=self.mediawiki_api_url,
                                                sparql_endpoint_url=self.sparql_endpoint_url,
                                                append_value=self.graph.APPEND_PROPS)
                wdi_helpers.try_write(wd_item, '', '', login, edit_summary="remove deprecated statements")
            if s_remove:
                wd_item = wdi_core.WDItemEngine(wd_item_id=qid, domain='none', data=s_remove, fast_run=False,
                                                mediawiki_api_url=self.mediawiki_api_url,
                                                sparql_endpoint_url=self.sparql_endpoint_url)
                wdi_helpers.try_write(wd_item, '', '', login, edit_summary="remove deprecated statements")
Пример #17
0
def do_compound(brand_qid, drug_qid, brand_name):
    # on the drug, add "active ingredient in"
    # remove brand name from aliases if there
    ref = create_ref_statement(emea, url)
    s = [wdi_core.WDItemID(brand_qid, 'P3780', references=[ref])]
    item = wdi_core.WDItemEngine(wd_item_id=drug_qid, data=s, append_value=['P3780'])
    aliases = item.get_aliases()
    aliases = [x for x in aliases if brand_name.lower() != x.lower()]
    item.set_aliases(aliases, append=False)
    wdi_helpers.try_write(item, '', '', login, edit_summary="add 'active ingredient in'")
Пример #18
0
    def run_one_indication(self, indication_qid, drugs_qid):
        ss = []
        for drug_qid in drugs_qid:
            s = wdi_core.WDItemID(drug_qid, PROPS['drug used for treatment'], references=self.refs)
            ss.append(s)

        item = self.item_engine(wd_item_id=indication_qid, data=ss)
        assert not item.create_new_item

        try_write(item, record_id=indication_qid, record_prop=PROPS['drug used for treatment'],
                  edit_summary="Add drug used for treatment from faers", login=self.login, write=self.write)
Пример #19
0
    def run_one_drug(self, drug_qid, indications_qid):
        ss = []
        for indication_qid in indications_qid:
            s = wdi_core.WDItemID(indication_qid, PROPS['medical condition treated'], references=self.refs)
            ss.append(s)

        item = self.item_engine(wd_item_id=drug_qid, data=ss)
        assert not item.create_new_item

        try_write(item, record_id=drug_qid, record_prop=PROPS['medical condition treated'],
                  edit_summary="Add medical condition treated from faers", login=self.login, write=self.write)
Пример #20
0
 def create(self, write=True):
     if self.deprecated:
         msg = wdi_helpers.format_msg(self.doid, 'P699', None, "delete me", msg_type="delete me")
         wdi_core.WDItemEngine.log("WARNING", msg)
         print(msg)
         return None
     try:
         self.create_xref_statements()
         self.s.extend(self.s_xref)
         self.create_main_statements()
         self.s.extend(self.s_main)
         wd_item = wdi_core.WDItemEngine(data=self.s,
                                         append_value=[PROPS['subclass of'], PROPS['instance of'],
                                                       PROPS['has cause'], PROPS['location'],
                                                       PROPS['OMIM ID'], PROPS['Orphanet ID'],
                                                       PROPS['MeSH ID'], PROPS['ICD-10-CM'],
                                                       PROPS['ICD-10'], PROPS['ICD-9-CM'],
                                                       PROPS['ICD-9'], PROPS['NCI Thesaurus ID'],
                                                       PROPS['UMLS CUI']
                                                       ],
                                         fast_run=self.do_graph.fast_run,
                                         fast_run_base_filter={'P699': ''},
                                         fast_run_use_refs=True,
                                         global_ref_mode='CUSTOM',
                                         ref_handler=update_retrieved_if_new
                                         )
         wd_item.fast_run_container.debug = False
         if wd_item.get_label(lang="en") == "":
             wd_item.set_label(self.lbl, lang="en")
         current_descr = wd_item.get_description(lang='en')
         if current_descr == self.definition and self.definition and len(self.definition) < 250:
             # change current def to cleaned def
             wd_item.set_description(utils.clean_description(self.definition))
         elif current_descr.lower() in {"", "human disease", "disease"} and self.definition and len(
                 self.definition) < 250:
             wd_item.set_description(utils.clean_description(self.definition))
         elif current_descr.lower() == "":
             wd_item.set_description(description="human disease", lang='en')
         if self.synonyms is not None:
             wd_item.set_aliases(aliases=self.synonyms, lang='en', append=True)
         if self.wikilink is not None:
             # a lot of these are not right... don't do this
             # wd_item.set_sitelink(site="enwiki", title=self.wikilink)
             pass
         wdi_helpers.try_write(wd_item, record_id=self.doid, record_prop='P699', login=self.do_graph.login,
                               write=write)
         return wd_item
     except Exception as e:
         exc_info = sys.exc_info()
         print(self.doid)
         traceback.print_exception(*exc_info)
         msg = wdi_helpers.format_msg(self.doid, 'P699', None, str(e), msg_type=type(e))
         wdi_core.WDItemEngine.log("ERROR", msg)
Пример #21
0
    def create_edges(self, login, write=True):

        # skip edges where the subject is not one of our nodes
        all_uris = set(node.id_uri for node in self.nodes)
        skipped_edges = [e for e in self.edges if e['sub'] not in all_uris]
        print("skipping {} edges where the subject is a node that is being skipped".format(len(skipped_edges)))

        for node in tqdm(self.nodes, desc="creating edges"):
            if not node.qid:
                m = wdi_helpers.format_msg(node.id_curie, node.id_pid, None, "QID not found, skipping edges")
                print(m)
                wdi_core.WDItemEngine.log("WARNING", m)
                continue
            this_uri = node.id_uri
            this_edges = [edge for edge in self.edges if edge['sub'] == this_uri]
            ss = []
            for edge in this_edges:
                s = self.make_statement_from_edge(edge)
                if s and s.get_value():
                    ss.append(s)

            # set instance of using the root node
            root_nodes = self.root_node[node.id_uri]
            for root_node in root_nodes:
                # don't add instance of self!
                if root_node in self.uri_node_map and root_node != node.id_uri:
                    # print("{} root node {}".format(node.id_uri, root_node))
                    ref = node.create_ref_statement()
                    value_qid = self.uri_node_map[root_node].qid
                    if value_qid:
                        ss.append(wdi_core.WDItemID(value_qid, self.helper.get_pid('P31'), references=[ref]))

            if not ss:
                # there are no statements for this node
                continue

            # print("{}".format([(x.get_value(), x.get_prop_nr()) for x in ss]))
            item = wdi_core.WDItemEngine(
                wd_item_id=node.qid, data=ss,
                append_value=self.APPEND_PROPS,
                fast_run=self.FAST_RUN,
                fast_run_base_filter={node.id_pid: ''},
                fast_run_use_refs=True,
                global_ref_mode='CUSTOM',
                ref_handler=self.ref_handler,
                sparql_endpoint_url=self.sparql_endpoint_url,
                mediawiki_api_url=self.mediawiki_api_url,
                core_props=self.CORE_IDS
            )
            this_pid, this_value = cu.parse_curie(cu.uri_to_curie(this_uri))
            this_pid = self.helper.get_pid(this_pid)
            wdi_helpers.try_write(item, record_id=this_value, record_prop=this_pid,
                                  login=login, write=write)
Пример #22
0
    def create(self, login, write=True, allow_new=True):
        # create or get qid
        # creates the primary external ID, the xrefs, instance of (if set), checks label, description, and aliases
        # not other properties (i.e. subclass), as these may require items existing that may not exist yet
        self._pre_create()
        assert self.id_curie
        s = self.create_statements()

        primary_ext_id_pid, primary_ext_id = cu.parse_curie(self.id_curie)
        primary_ext_id_pid = self.helper.get_pid(primary_ext_id_pid)
        assert primary_ext_id_pid in self.graph.APPEND_PROPS

        try:
            self.item = wdi_core.WDItemEngine(
                data=s,
                append_value=self.graph.APPEND_PROPS,
                fast_run=self.graph.FAST_RUN,
                fast_run_base_filter={primary_ext_id_pid: ''},
                fast_run_use_refs=True,
                global_ref_mode='CUSTOM',
                ref_handler=self.ref_handler,
                mediawiki_api_url=self.mediawiki_api_url,
                sparql_endpoint_url=self.sparql_endpoint_url,
                core_props=self.graph.CORE_IDS,
                core_prop_match_thresh=.9
            )
            # assert the retrieved item doesn't already have a primary_ext_id id
            if self.item.wd_item_id:
                query = "select ?primary_ext_id where {{ wd:{} wdt:{} ?primary_ext_id }}".format(self.item.wd_item_id,
                                                                                                 primary_ext_id_pid)
                results = wdi_core.WDItemEngine.execute_sparql_query(query)['results']['bindings']
                if results:
                    existing_primary_ext_id = [x['primary_ext_id']['value'] for x in results]
                    if self.id_curie not in existing_primary_ext_id:
                        raise Exception(
                            "conflicting primary_ext_id IDs: {} on {}".format(self.id_curie, self.item.wd_item_id))
            if self.item.create_new_item and not allow_new:
                return None
        except Exception as e:
            traceback.print_exc()
            msg = wdi_helpers.format_msg(primary_ext_id, primary_ext_id_pid, None, str(e), msg_type=type(e))
            wdi_core.WDItemEngine.log("ERROR", msg)
            return
        self.set_label(self.item)
        self.set_descr(self.item)
        self.set_aliases(self.item)
        # todo: I want to avoid this from happening: https://www.wikidata.org/w/index.php?title=Q4553565&diff=676750840&oldid=647941942

        wdi_helpers.try_write(self.item, record_id=primary_ext_id, record_prop=primary_ext_id_pid,
                              login=login, write=write)

        self.qid = self.item.wd_item_id
Пример #23
0
def remove_deprecated_statements(qid, frc, releases, last_updated, props,
                                 login):
    """
    :param qid: qid of item
    :param frc: a fastrun container
    :param releases: list of releases to remove (a statement that has a reference that is stated in one of these
            releases will be removed)
    :param last_updated: looks like {'Q20641742': datetime.date(2017,5,6)}. a statement that has a reference that is
            stated in Q20641742 (entrez) and was retrieved more than DAYS before 2017-5-6 will be removed
    :param props: look at these props
    :param login:
    :return:
    """
    for prop in props:
        frc.write_required([wdi_core.WDString("fake value", prop)])
    orig_statements = frc.reconstruct_statements(qid)
    releases = set(int(r[1:]) for r in releases)

    s_dep = []
    for s in orig_statements:
        if any(
                any(x.get_prop_nr() == 'P248' and x.get_value() in releases
                    for x in r) for r in s.get_references()):
            setattr(s, 'remove', '')
            s_dep.append(s)
        else:
            for r in s.get_references():
                dbs = [
                    x.get_value() for x in r if x.get_value() in last_updated
                ]
                if dbs:
                    db = dbs[0]
                    if any(x.get_prop_nr() == 'P813' and last_updated[db] -
                           x.get_value() > DAYS for x in r):
                        setattr(s, 'remove', '')
                        s_dep.append(s)
    if s_dep:
        print("-----")
        print(qid)
        print(len(s_dep))
        print([(x.get_prop_nr(), x.value) for x in s_dep])
        print([(x.get_references()[0]) for x in s_dep])
        wd_item = wdi_core.WDItemEngine(wd_item_id=qid,
                                        domain='none',
                                        data=s_dep,
                                        fast_run=False)
        wdi_helpers.try_write(wd_item,
                              '',
                              '',
                              login,
                              edit_summary="remove deprecated statements")
Пример #24
0
    def create(self, login):
        # get names of components
        labels = getConceptLabels(self.component_qids)

        name = " / ".join(labels.values()) + " combination therapy"
        description = "combination therapy"

        # has part
        s = [
            wdi_core.WDItemID(x, PROPS['has part'])
            for x in self.component_qids
        ]
        # instance of combination therapy
        s.append(wdi_core.WDItemID("Q1304270", PROPS['instance of']))

        item = wdi_core.WDItemEngine(item_name=name, data=s, domain="asdf")
        item.set_label(name)
        item.set_description(description)
        success = try_write(item,
                            record_id=";".join(self.component_qids),
                            record_prop='',
                            login=login)
        if success:
            self.combo_qid[self.component_qids] = item.wd_item_id
            self.qid_combo[item.wd_item_id] = self.component_qids
            return item.wd_item_id
        else:
            raise ValueError("unsuccessful item creation")
Пример #25
0
    def create_item(self, fast_run=True, write=True):
        self.parse_external_ids()
        self.statements = self.create_statements()
        # remove subclass of gene statements
        # s = wdi_core.WDItemID("Q7187", "P279")
        # setattr(s, 'remove', '')
        self.create_label()
        self.create_description()
        self.create_aliases()

        self.fast_run_base_filter = {
            PROPS['Entrez Gene ID']: '',
            PROPS['found in taxon']: self.organism_info['wdid']
        }

        self.wd_item_gene = wdi_core.WDItemEngine(
            item_name=self.label,
            domain='genes',
            data=self.statements,
            append_value=[PROPS['instance of']],
            fast_run=fast_run,
            fast_run_base_filter=self.fast_run_base_filter,
            fast_run_use_refs=True,
            ref_handler=update_retrieved_if_new,
            global_ref_mode="CUSTOM",
            core_props=core_props)

        self.wd_item_gene = self.set_label_desc_aliases(self.wd_item_gene)
        self.status = wdi_helpers.try_write(
            self.wd_item_gene,
            self.external_ids['Entrez Gene ID'],
            PROPS['Entrez Gene ID'],
            self.login,
            write=write)
Пример #26
0
    def create_item(self, login=None, write=True):
        if self.deprecated:
            return None
        try:
            s = []
            s.extend(self.create_xref_statements())
            s.extend(self.create_main_statements_nodepend())

            wd_item = wdi_core.WDItemEngine(
                item_name=self.lbl,
                data=s,
                domain=self.domain,
                append_value=[PROPS['subclass of'], PROPS['instance of']],
                fast_run=self.fast_run,
                fast_run_base_filter={self.primary_ext_prop_qid: ''})
            if wd_item.get_label(lang="en") == "":
                wd_item.set_label(self.lbl, lang="en")
            current_descr = wd_item.get_description(lang='en')
            if current_descr.lower() in {
                    "", self.default_label
            } and self.definition and len(self.definition) < 250:
                wd_item.set_description(description=self.definition, lang='en')
            elif current_descr.lower() == "":
                wd_item.set_description(description=self.default_label,
                                        lang='en')
            if self.synonyms is not None:
                wd_item.set_aliases(aliases=self.synonyms,
                                    lang='en',
                                    append=True)
            if self.wikilink is not None:
                wd_item.set_sitelink(site="enwiki", title=self.wikilink)
            wdi_helpers.try_write(wd_item,
                                  record_id=self.id_colon,
                                  record_prop=self.primary_ext_prop_qid,
                                  login=login,
                                  write=write)
            self.wd_item_id = wd_item.wd_item_id
            return wd_item
        except Exception as e:
            exc_info = sys.exc_info()
            traceback.print_exception(*exc_info)
            msg = wdi_helpers.format_msg(self.id_colon,
                                         self.primary_ext_prop_qid,
                                         None,
                                         str(e),
                                         msg_type=type(e))
            wdi_core.WDItemEngine.log("ERROR", msg)
    def create_chrom(self, organism_name, organism_qid, chrom_name, genome_id, chr_type, login):

        def make_ref(retrieved, genome_id):
            """
            Create reference statement for chromosomes
            :param retrieved: datetime
            :type retrieved: datetime
            :param genome_id: refseq genome id
            :type genome_id: str
            :return:
            """
            refs = [
                wdi_core.WDItemID(value='Q20641742', prop_nr='P248', is_reference=True),  # stated in ncbi gene
                wdi_core.WDString(value=genome_id, prop_nr='P2249', is_reference=True),  # Link to Refseq Genome ID
                wdi_core.WDTime(retrieved.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True)
            ]
            return refs

        item_name = '{} {}'.format(organism_name, chrom_name)
        item_description = 'bacterial {}'.format(chr_type)
        print(genome_id)

        retrieved = datetime.now()
        reference = make_ref(retrieved, genome_id)

        # instance of chr_type
        chr_type = chr_type.lower()
        if chr_type not in self.chr_type_map:
            raise ValueError("unknown chromosome type: {}".format(chr_type))
        statements = [wdi_core.WDItemID(value=self.chr_type_map[chr_type], prop_nr='P31', references=[reference])]
        # found in taxon
        statements.append(wdi_core.WDItemID(value=organism_qid, prop_nr='P703', references=[reference]))
        # genome id
        statements.append(wdi_core.WDString(value=genome_id, prop_nr='P2249', references=[reference]))

        wd_item = wdi_core.WDItemEngine(data=statements,
                                        append_value=['P31'], fast_run=True,
                                        fast_run_base_filter={'P703': organism_qid, 'P2249': ''},
                                        core_props=core_props)
        if wd_item.wd_item_id:
            return wd_item.wd_item_id
        if login is None:
            raise ValueError("Login is required to create item")
        wd_item.set_label(item_name)
        wd_item.set_description(item_description, lang='en')
        wdi_helpers.try_write(wd_item, genome_id, 'P2249', login)
        return wd_item.wd_item_id
Пример #28
0
    def create_item(self, fast_run=True, write=True):
        try:
            self.parse_external_ids()
            self.statements = self.create_statements()
            self.create_label()
            self.create_description()
            self.create_aliases()

            wd_item_protein = wdi_core.WDItemEngine(
                item_name=self.label,
                domain='proteins',
                data=self.statements,
                append_value=[
                    PROPS['instance of'], PROPS['encoded by'],
                    PROPS['Ensembl Protein ID'], PROPS['RefSeq Protein ID']
                ],
                fast_run=fast_run,
                fast_run_base_filter={
                    PROPS['UniProt ID']: '',
                    PROPS['found in taxon']: self.organism_info['wdid']
                })
            wd_item_protein.set_label(self.label)
            wd_item_protein.set_description(self.description, lang='en')

            # remove the alias "protein"
            current_aliases = set(wd_item_protein.get_aliases())
            aliases = current_aliases | set(self.aliases)
            if "protein" in aliases:
                aliases.remove("protein")
            wd_item_protein.set_aliases(aliases, append=False)

            wdi_helpers.try_write(wd_item_protein,
                                  self.external_ids['UniProt ID'],
                                  PROPS['UniProt ID'],
                                  self.login,
                                  write=write)
            self.protein_wdid = wd_item_protein.wd_item_id
        except Exception as e:
            exc_info = sys.exc_info()
            traceback.print_exception(*exc_info)
            msg = wdi_helpers.format_msg(self.external_ids['UniProt ID'],
                                         PROPS['UniProt ID'],
                                         None,
                                         str(e),
                                         msg_type=type(e))
            wdi_core.WDItemEngine.log("ERROR", msg)
Пример #29
0
    def create_edges(self):
        edges = self.edges

        subj_edges = edges.groupby(":START_ID")

        # subj, rows = "UniProt:Q96IV0", edges[edges[':START_ID']=='ClinVarVariant:50962']
        for subj, rows in tqdm(subj_edges, total=len(subj_edges)):
            subj = self.dbxref_qid.get(rows.iloc[0][':START_ID'])
            ss = self.create_subj_edges(rows)
            if not ss:
                continue
            item = self.item_engine(wd_item_id=subj, data=ss, domain="asdf")
            wdi_helpers.try_write(item,
                                  rows.iloc[0][':START_ID'],
                                  self.dbxref_pid,
                                  self.login,
                                  write=self.write)
Пример #30
0
 def write_item(self, wd_item):
     if self.write:
         try:
             wdi_helpers.try_write(wd_item['item'],
                                   record_id=wd_item['record_id'],
                                   record_prop=wd_item['record_prop'],
                                   edit_summary='edit genetic association',
                                   login=self.login,
                                   write=self.write)
         except Exception as e:
             print(e)
             wdi_core.WDItemEngine.log(
                 "ERROR",
                 wdi_helpers.format_msg(wd_item['record_id'],
                                        wd_item['record_prop'],
                                        wd_item['item'].wd_item_id, str(e),
                                        type(e)))