def run_one(wd_expressed_in_statements: dict, login, append_data: bool = APPEND_DATA): """Insert statements of wikidata_gene_id expressed in wikidata_organ_id along with its reference. :param wd_expressed_in_statements: the Wikidata expressed in statements dictionary where the key is a wikidata gene id and the value is a list of Wikidata anatomic entity items. :param login: the Wikidata login object. """ # create the item object, specifying the qid count = 0 for wikidata_gene_id, organ_statements in wd_expressed_in_statements.items( ): if append_data: item = wdi_core.WDItemEngine(wd_item_id=wikidata_gene_id, search_only=True, global_ref_mode=APPEND_REFERENCE_MODE) item.update(organ_statements, [PROPS['expressed in']]) else: item = wdi_core.WDItemEngine( data=organ_statements, wd_item_id=wikidata_gene_id, fast_run=True, fast_run_base_filter={PROPS['expressed in']: ''}) wdi_helpers.try_write( item, record_id=wikidata_gene_id + "-" + str(count), record_prop=PROPS['expressed in'], login=login, edit_summary="Update gene expression based on the Bgee database") count = count + 1
def write_item(self, wd_item): wdi_helpers.try_write(wd_item['item'], record_id=wd_item['record_id'], record_prop=wd_item['record_prop'], edit_summary='edit genetic association', login=self.login, write=self.write)
def do_item(entrezgene, orthologs, reference, entrez_homo, entrez_taxon, taxon_wdid, entrez_wdid, login, write): entrezgene = str(entrezgene) s = [] this_ref = reference(entrez_homo[entrezgene]) for ortholog in orthologs: ortholog = str(ortholog) if ortholog == entrezgene: continue if ortholog not in entrez_taxon: raise ValueError("missing taxid for: " + ortholog) qualifier = wdi_core.WDItemID(taxon_wdid[entrez_taxon[ortholog]], PROPS['found in taxon'], is_qualifier=True) s.append( wdi_core.WDItemID(entrez_wdid[ortholog], PROPS['ortholog'], references=[this_ref], qualifiers=[qualifier])) item = wdi_core.WDItemEngine(wd_item_id=entrez_wdid[entrezgene], data=s, fast_run=fast_run, fast_run_base_filter={ PROPS['Entrez Gene ID']: '', PROPS['found in taxon']: taxon_wdid[entrez_taxon[entrezgene]] }, core_props=core_props) wdi_helpers.try_write(item, entrezgene, PROPS['Entrez Gene ID'], edit_summary="edit orthologs", login=login, write=write)
def do_pharm_prod(drug_qid, brand_rxnorm, emea, url, brand_name): # write info on the pharmaceutical product page ref = create_ref_statement(emea, url) # has active substance s = [wdi_core.WDItemID(drug_qid, 'P3781', references=[ref])] # instance of s.append(wdi_core.WDItemID('Q28885102', 'P31', references=[ref])) # pharmaceutical product s.append(wdi_core.WDItemID('Q169336', 'P31', references=[ref])) # chemical mixture # emea s.append(wdi_core.WDExternalID(emea, 'P3637', references=[ref])) if not pd.isnull(brand_rxnorm): s.append(wdi_core.WDExternalID(str(int(brand_rxnorm)), "P3345")) item = wdi_core.WDItemEngine(item_name=brand_name, data=s, domain="drugs", append_value=['P3781']) item.set_label(brand_name) if item.get_description() == '': item.set_description("pharmaceutical product") wdi_helpers.try_write(item, emea, 'P3637', login, edit_summary="add 'active ingredient'") return item.wd_item_id
def create_depend(self, login=None, write=True): if self.deprecated: return None if not self.wd_item_id: print("must create item first: {}".format(node.id_purl)) return None try: s = self.create_main_statements() wd_item = wdi_core.WDItemEngine( wd_item_id=self.wd_item_id, data=s, domain=self.domain, append_value=[PROPS['subclass of'], PROPS['instance of']], fast_run=self.fast_run, fast_run_base_filter={self.primary_ext_prop_qid: ''}) wdi_helpers.try_write(wd_item, record_id=self.id_colon, record_prop=self.primary_ext_prop_qid, login=login, write=write) return wd_item except Exception as e: exc_info = sys.exc_info() traceback.print_exception(*exc_info) msg = wdi_helpers.format_msg(self.id_colon, self.primary_ext_prop_qid, None, str(e), msg_type=type(e)) wdi_core.WDItemEngine.log("ERROR", msg)
def get_or_create(self, login): self.validate() self.make_reference() self.make_statements() self.make_ext_id_statements() self.make_author_statements() item = wdi_core.WDItemEngine( data=self.statements, append_value=[PROPS['DOI'], PROPS['PMCID'], PROPS['PubMed ID']], # ref_handler=update_retrieved_if_new_multiple_refs() ) if item.wd_item_id: return item.wd_item_id, self.warnings, True self.set_label(item) self.set_description(item) if self.source == 'arxiv': success = try_write(item, self.ids['arxiv'], PROPS["arxiv id"], login) elif self.source == 'biorxiv': success = try_write(item, self.ids['biorxiv'], PROPS["biorxiv id"], login) else: success = try_write(item, self.ids['doi'], PROPS["DOI"], login) return item.wd_item_id, self.warnings, success
def run_one(row): label = row['#Organism/Name'] taxid = str(row['TaxID']) genbank_id = row['Assembly Accession'] s = [ wdi_core.WDExternalID(genbank_id, uri_map[PROPS['GenBank Assembly accession']], references=[create_reference(genbank_id)]), wdi_core.WDExternalID(taxid, uri_map[PROPS['NCBI Taxonomy ID']], references=[create_reference(genbank_id)]), ] item = localItemEngine( data=s, item_name=label, domain="organism", fast_run=True, fast_run_base_filter={uri_map[PROPS['NCBI Taxonomy ID']]: ''}) item.set_label(label) item.set_description("bug") wdi_helpers.try_write( item, login=login, record_id=genbank_id, record_prop=uri_map[PROPS['GenBank Assembly accession']])
def run_one(taxid, genbank_id): # get the QID taxid = str(taxid) if taxid not in tax_qid_map: msg = wdi_helpers.format_msg( genbank_id, PROPS['GenBank Assembly accession'], "", "organism with taxid {} not found or skipped".format(taxid)) wdi_core.WDItemEngine.log("WARNING", msg) return None qid = tax_qid_map[taxid] reference = create_reference(genbank_id) genbank_statement = wdi_core.WDExternalID( genbank_id, PROPS['GenBank Assembly accession'], references=[reference]) # create the item object, specifying the qid item = wdi_core.WDItemEngine( data=[genbank_statement], wd_item_id=qid, fast_run=True, fast_run_base_filter={PROPS['GenBank Assembly accession']: ''}, global_ref_mode='CUSTOM', fast_run_use_refs=True, ref_handler=update_retrieved_if_new) wdi_helpers.try_write(item, record_id=genbank_id, record_prop=PROPS['GenBank Assembly accession'], login=login, edit_summary="update GenBank Assembly accession")
def update_item(self, qid, fast_run=True, write=True): print("updating protein: {}".format(qid)) try: self.parse_external_ids() self.statements = self.create_statements() wd_item_protein = wdi_core.WDItemEngine(wd_item_id=qid, data=self.statements, append_value=[PROPS['instance of'], PROPS['encoded by'], PROPS['Ensembl Protein ID'], PROPS['RefSeq Protein ID']], fast_run=fast_run, fast_run_base_filter={PROPS['UniProt ID']: '', PROPS['found in taxon']: self.organism_info[ 'wdid']}, fast_run_use_refs=True, ref_handler=update_retrieved_if_new, global_ref_mode="CUSTOM", core_props=core_props) wdi_helpers.try_write(wd_item_protein, self.external_ids['UniProt ID'], PROPS['UniProt ID'], self.login, write=write) self.protein_wdid = wd_item_protein.wd_item_id return wd_item_protein except Exception as e: exc_info = sys.exc_info() traceback.print_exception(*exc_info) msg = wdi_helpers.format_msg(self.external_ids['Entrez Gene ID'], PROPS['Entrez Gene ID'], None, str(e), msg_type=type(e)) wdi_core.WDItemEngine.log("ERROR", msg) return None
def make_gene_encodes(self, write=True): """ Add an "encodes" statement to the gene item :return: """ uniprot_ref = make_ref_source(self.record['uniprot']['@source'], PROPS['UniProt ID'], self.external_ids['UniProt ID'], login=self.login) try: statements = [wdi_core.WDItemID(self.protein_wdid, PROPS['encodes'], references=[uniprot_ref])] wd_item_gene = wdi_core.WDItemEngine(wd_item_id=self.gene_wdid,data=statements, append_value=[PROPS['encodes']], fast_run=fast_run, fast_run_base_filter={PROPS['Entrez Gene ID']: '', PROPS['found in taxon']: self.organism_info[ 'wdid']}, global_ref_mode="CUSTOM", ref_handler=update_retrieved_if_new, core_props=core_props) wdi_helpers.try_write(wd_item_gene, self.external_ids['UniProt ID'], PROPS['UniProt ID'], self.login, write=write) except Exception as e: exc_info = sys.exc_info() traceback.print_exception(*exc_info) msg = wdi_helpers.format_msg(self.external_ids['UniProt ID'], PROPS['UniProt ID'], None, str(e), msg_type=type(e)) wdi_core.WDItemEngine.log("ERROR", msg)
def remove_deprecated_statements(qid, frc, release_wdid, props, login): releases = set(INTERPRO_RELEASES.values()) | {'Q3047275'} releases = set(int(x.replace("Q", "")) for x in releases) # don't count this release releases.discard(int(release_wdid.replace("Q", ""))) # make sure we have these props in frc for prop in props: frc.write_required([wdi_core.WDString("fake value", prop)]) orig_statements = frc.reconstruct_statements(qid) s_dep = [] for s in orig_statements: if any( any(x.get_prop_nr() == 'P248' and x.get_value() in releases for x in r) for r in s.get_references()): setattr(s, 'remove', '') s_dep.append(s) if s_dep: print("-----") print(qid) print(orig_statements) print(s_dep) print([(x.get_prop_nr(), x.value) for x in s_dep]) print([(x.get_references()[0]) for x in s_dep]) wd_item = wdi_core.WDItemEngine(wd_item_id=qid, domain='none', data=s_dep, fast_run=False) wdi_helpers.try_write(wd_item, '', '', login, edit_summary="remove deprecated statements")
def make_gene_encodes(self, write=True): """ Add an "encodes" statement to the gene item :return: """ uniprot_ref = make_ref_source(self.record['uniprot']['@source'], PROPS['UniProt ID'], self.external_ids['UniProt ID'], login=self.login) try: statements = [ wdi_core.WDItemID(self.protein_wdid, PROPS['encodes'], references=[uniprot_ref]) ] wd_item_gene = wdi_core.WDItemEngine( wd_item_id=self.gene_wdid, domain='genes', data=statements, append_value=[PROPS['encodes']]) wdi_helpers.try_write(wd_item_gene, self.external_ids['UniProt ID'], PROPS['UniProt ID'], self.login, write=write) except Exception as e: exc_info = sys.exc_info() traceback.print_exception(*exc_info) msg = wdi_helpers.format_msg(self.external_ids['UniProt ID'], PROPS['UniProt ID'], None, str(e), msg_type=type(e)) wdi_core.WDItemEngine.log("ERROR", msg)
def run_one_disease(self, disease_qid, records): ss = [] for record in records: incidence = float(record['Percent affected'][:-2]) pmid = record['Pubmed id'] phenotype_qid = record['phenotype_qid'] omim_id = record['disease'] refs = [ self.create_reference(omim_id, pmid=pmid, login=self.login) ] qual = self.create_qualifier(incidence) s = wdi_core.WDItemID(phenotype_qid, PROPS['symptoms'], references=refs, qualifiers=qual) ss.append(s) item = self.item_engine(wd_item_id=disease_qid, data=ss) assert not item.create_new_item try_write(item, record_id=disease_qid, record_prop=PROPS['symptoms'], edit_summary="Add phenotype from mitodb", login=self.login, write=self.write)
def create_item(self, login=None, fast_run=True, write=True): # if no login given, write will not be attempted statements = [wdi_core.WDExternalID(value=self.id, prop_nr=INTERPRO, references=[self.reference]), wdi_core.WDItemID(value=self.type_wdid, prop_nr=INSTANCE_OF, references=[self.reference])] try: wd_item = wdi_core.WDItemEngine(item_name=self.name, domain='interpro', data=statements, append_value=["P279", "P31"], fast_run=fast_run, fast_run_base_filter=IPRTerm.fast_run_base_filter) except JSONDecodeError as e: wdi_core.WDItemEngine.log("ERROR", wdi_helpers.format_msg(self.id, INTERPRO, None, str(e), msg_type=type(e))) return None wd_item.set_label(self.name, lang='en') for lang, description in self.lang_descr.items(): if wd_item.get_description(lang=lang) == "": wd_item.set_description(description, lang=lang) wd_item.set_aliases([self.short_name, self.id]) if login: wdi_helpers.try_write(wd_item, self.id, INTERPRO, login, write=write) return wd_item
def create_relationships(self, login, write=True): try: # endpoint may not get updated in time? self.do_wdid_lookup() except KeyError as e: wdi_core.WDItemEngine.log("ERROR", format_msg(self.id, INTERPRO, None, str(e), type(e))) return statements = [wdi_core.WDExternalID(value=self.id, prop_nr=INTERPRO, references=[self.reference])] if self.parent: # subclass of statements.append(wdi_core.WDItemID(value=self.parent_wdid, prop_nr='P279', references=[self.reference])) if self.contains: for c in self.contains_wdid: statements.append(wdi_core.WDItemID(value=c, prop_nr='P527', references=[self.reference])) # has part if self.found_in: for f in self.found_in_wdid: statements.append(wdi_core.WDItemID(value=f, prop_nr='P361', references=[self.reference])) # part of if len(statements) == 1: return wd_item = wdi_core.WDItemEngine(wd_item_id=self.wdid, domain='interpro', data=statements, append_value=['P279', 'P527', 'P361'], fast_run=True, fast_run_base_filter=IPRTerm.fast_run_base_filter) wdi_helpers.try_write(wd_item, self.id, INTERPRO, login, edit_summary="create/update subclass/has part/part of", write=write)
def remove_deprecated_statements(self, releases, frc, login): """ :param releases: a set of qid for releases which, when used as 'stated in' on a reference, the statement should be removed :param frc: :param login: :return: """ def is_old_ref(ref, releases): stated_in = self.helper.get_pid('P248') return any(r.get_prop_nr() == stated_in and "Q" + str(r.get_value()) in releases for r in ref) qid = self.qid primary_ext_id_pid, primary_ext_id = cu.parse_curie(self.id_curie) primary_ext_id_pid = self.helper.get_pid(primary_ext_id_pid) statements = frc.reconstruct_statements(qid) s_remove = [] s_deprecate = [] for s in statements: if len(s.get_references()) == 1 and is_old_ref(s.get_references()[0], releases): # this is the only ref on this statement and its from an old release if s.get_prop_nr() == primary_ext_id_pid: # if its on the primary ID for this item, deprecate instead of removing it s.set_rank('deprecated') s_deprecate.append(s) else: setattr(s, 'remove', '') s_remove.append(s) if len(s.get_references()) > 1 and any(is_old_ref(ref, releases) for ref in s.get_references()): # there is another reference on this statement, and a old reference # we should just remove the old reference and keep the statement s.set_references([ref for ref in s.get_references() if not is_old_ref(ref, releases)]) s_deprecate.append(s) if s_deprecate or s_remove: print("-----") print(qid) print([(x.get_prop_nr(), x.value) for x in s_deprecate]) print([(x.get_prop_nr(), x.value) for x in s_remove]) """ I don't know why I have to split it up like this, but if you try to remove statements with append_value set, the statements don't get removed, and if you try to remove a ref off a statement without append_value set, then all other statements get removed. It works if you do them seperately... """ if s_deprecate: wd_item = wdi_core.WDItemEngine(wd_item_id=qid, domain='none', data=s_deprecate, fast_run=False, mediawiki_api_url=self.mediawiki_api_url, sparql_endpoint_url=self.sparql_endpoint_url, append_value=self.graph.APPEND_PROPS) wdi_helpers.try_write(wd_item, '', '', login, edit_summary="remove deprecated statements") if s_remove: wd_item = wdi_core.WDItemEngine(wd_item_id=qid, domain='none', data=s_remove, fast_run=False, mediawiki_api_url=self.mediawiki_api_url, sparql_endpoint_url=self.sparql_endpoint_url) wdi_helpers.try_write(wd_item, '', '', login, edit_summary="remove deprecated statements")
def do_compound(brand_qid, drug_qid, brand_name): # on the drug, add "active ingredient in" # remove brand name from aliases if there ref = create_ref_statement(emea, url) s = [wdi_core.WDItemID(brand_qid, 'P3780', references=[ref])] item = wdi_core.WDItemEngine(wd_item_id=drug_qid, data=s, append_value=['P3780']) aliases = item.get_aliases() aliases = [x for x in aliases if brand_name.lower() != x.lower()] item.set_aliases(aliases, append=False) wdi_helpers.try_write(item, '', '', login, edit_summary="add 'active ingredient in'")
def run_one_indication(self, indication_qid, drugs_qid): ss = [] for drug_qid in drugs_qid: s = wdi_core.WDItemID(drug_qid, PROPS['drug used for treatment'], references=self.refs) ss.append(s) item = self.item_engine(wd_item_id=indication_qid, data=ss) assert not item.create_new_item try_write(item, record_id=indication_qid, record_prop=PROPS['drug used for treatment'], edit_summary="Add drug used for treatment from faers", login=self.login, write=self.write)
def run_one_drug(self, drug_qid, indications_qid): ss = [] for indication_qid in indications_qid: s = wdi_core.WDItemID(indication_qid, PROPS['medical condition treated'], references=self.refs) ss.append(s) item = self.item_engine(wd_item_id=drug_qid, data=ss) assert not item.create_new_item try_write(item, record_id=drug_qid, record_prop=PROPS['medical condition treated'], edit_summary="Add medical condition treated from faers", login=self.login, write=self.write)
def create(self, write=True): if self.deprecated: msg = wdi_helpers.format_msg(self.doid, 'P699', None, "delete me", msg_type="delete me") wdi_core.WDItemEngine.log("WARNING", msg) print(msg) return None try: self.create_xref_statements() self.s.extend(self.s_xref) self.create_main_statements() self.s.extend(self.s_main) wd_item = wdi_core.WDItemEngine(data=self.s, append_value=[PROPS['subclass of'], PROPS['instance of'], PROPS['has cause'], PROPS['location'], PROPS['OMIM ID'], PROPS['Orphanet ID'], PROPS['MeSH ID'], PROPS['ICD-10-CM'], PROPS['ICD-10'], PROPS['ICD-9-CM'], PROPS['ICD-9'], PROPS['NCI Thesaurus ID'], PROPS['UMLS CUI'] ], fast_run=self.do_graph.fast_run, fast_run_base_filter={'P699': ''}, fast_run_use_refs=True, global_ref_mode='CUSTOM', ref_handler=update_retrieved_if_new ) wd_item.fast_run_container.debug = False if wd_item.get_label(lang="en") == "": wd_item.set_label(self.lbl, lang="en") current_descr = wd_item.get_description(lang='en') if current_descr == self.definition and self.definition and len(self.definition) < 250: # change current def to cleaned def wd_item.set_description(utils.clean_description(self.definition)) elif current_descr.lower() in {"", "human disease", "disease"} and self.definition and len( self.definition) < 250: wd_item.set_description(utils.clean_description(self.definition)) elif current_descr.lower() == "": wd_item.set_description(description="human disease", lang='en') if self.synonyms is not None: wd_item.set_aliases(aliases=self.synonyms, lang='en', append=True) if self.wikilink is not None: # a lot of these are not right... don't do this # wd_item.set_sitelink(site="enwiki", title=self.wikilink) pass wdi_helpers.try_write(wd_item, record_id=self.doid, record_prop='P699', login=self.do_graph.login, write=write) return wd_item except Exception as e: exc_info = sys.exc_info() print(self.doid) traceback.print_exception(*exc_info) msg = wdi_helpers.format_msg(self.doid, 'P699', None, str(e), msg_type=type(e)) wdi_core.WDItemEngine.log("ERROR", msg)
def create_edges(self, login, write=True): # skip edges where the subject is not one of our nodes all_uris = set(node.id_uri for node in self.nodes) skipped_edges = [e for e in self.edges if e['sub'] not in all_uris] print("skipping {} edges where the subject is a node that is being skipped".format(len(skipped_edges))) for node in tqdm(self.nodes, desc="creating edges"): if not node.qid: m = wdi_helpers.format_msg(node.id_curie, node.id_pid, None, "QID not found, skipping edges") print(m) wdi_core.WDItemEngine.log("WARNING", m) continue this_uri = node.id_uri this_edges = [edge for edge in self.edges if edge['sub'] == this_uri] ss = [] for edge in this_edges: s = self.make_statement_from_edge(edge) if s and s.get_value(): ss.append(s) # set instance of using the root node root_nodes = self.root_node[node.id_uri] for root_node in root_nodes: # don't add instance of self! if root_node in self.uri_node_map and root_node != node.id_uri: # print("{} root node {}".format(node.id_uri, root_node)) ref = node.create_ref_statement() value_qid = self.uri_node_map[root_node].qid if value_qid: ss.append(wdi_core.WDItemID(value_qid, self.helper.get_pid('P31'), references=[ref])) if not ss: # there are no statements for this node continue # print("{}".format([(x.get_value(), x.get_prop_nr()) for x in ss])) item = wdi_core.WDItemEngine( wd_item_id=node.qid, data=ss, append_value=self.APPEND_PROPS, fast_run=self.FAST_RUN, fast_run_base_filter={node.id_pid: ''}, fast_run_use_refs=True, global_ref_mode='CUSTOM', ref_handler=self.ref_handler, sparql_endpoint_url=self.sparql_endpoint_url, mediawiki_api_url=self.mediawiki_api_url, core_props=self.CORE_IDS ) this_pid, this_value = cu.parse_curie(cu.uri_to_curie(this_uri)) this_pid = self.helper.get_pid(this_pid) wdi_helpers.try_write(item, record_id=this_value, record_prop=this_pid, login=login, write=write)
def create(self, login, write=True, allow_new=True): # create or get qid # creates the primary external ID, the xrefs, instance of (if set), checks label, description, and aliases # not other properties (i.e. subclass), as these may require items existing that may not exist yet self._pre_create() assert self.id_curie s = self.create_statements() primary_ext_id_pid, primary_ext_id = cu.parse_curie(self.id_curie) primary_ext_id_pid = self.helper.get_pid(primary_ext_id_pid) assert primary_ext_id_pid in self.graph.APPEND_PROPS try: self.item = wdi_core.WDItemEngine( data=s, append_value=self.graph.APPEND_PROPS, fast_run=self.graph.FAST_RUN, fast_run_base_filter={primary_ext_id_pid: ''}, fast_run_use_refs=True, global_ref_mode='CUSTOM', ref_handler=self.ref_handler, mediawiki_api_url=self.mediawiki_api_url, sparql_endpoint_url=self.sparql_endpoint_url, core_props=self.graph.CORE_IDS, core_prop_match_thresh=.9 ) # assert the retrieved item doesn't already have a primary_ext_id id if self.item.wd_item_id: query = "select ?primary_ext_id where {{ wd:{} wdt:{} ?primary_ext_id }}".format(self.item.wd_item_id, primary_ext_id_pid) results = wdi_core.WDItemEngine.execute_sparql_query(query)['results']['bindings'] if results: existing_primary_ext_id = [x['primary_ext_id']['value'] for x in results] if self.id_curie not in existing_primary_ext_id: raise Exception( "conflicting primary_ext_id IDs: {} on {}".format(self.id_curie, self.item.wd_item_id)) if self.item.create_new_item and not allow_new: return None except Exception as e: traceback.print_exc() msg = wdi_helpers.format_msg(primary_ext_id, primary_ext_id_pid, None, str(e), msg_type=type(e)) wdi_core.WDItemEngine.log("ERROR", msg) return self.set_label(self.item) self.set_descr(self.item) self.set_aliases(self.item) # todo: I want to avoid this from happening: https://www.wikidata.org/w/index.php?title=Q4553565&diff=676750840&oldid=647941942 wdi_helpers.try_write(self.item, record_id=primary_ext_id, record_prop=primary_ext_id_pid, login=login, write=write) self.qid = self.item.wd_item_id
def remove_deprecated_statements(qid, frc, releases, last_updated, props, login): """ :param qid: qid of item :param frc: a fastrun container :param releases: list of releases to remove (a statement that has a reference that is stated in one of these releases will be removed) :param last_updated: looks like {'Q20641742': datetime.date(2017,5,6)}. a statement that has a reference that is stated in Q20641742 (entrez) and was retrieved more than DAYS before 2017-5-6 will be removed :param props: look at these props :param login: :return: """ for prop in props: frc.write_required([wdi_core.WDString("fake value", prop)]) orig_statements = frc.reconstruct_statements(qid) releases = set(int(r[1:]) for r in releases) s_dep = [] for s in orig_statements: if any( any(x.get_prop_nr() == 'P248' and x.get_value() in releases for x in r) for r in s.get_references()): setattr(s, 'remove', '') s_dep.append(s) else: for r in s.get_references(): dbs = [ x.get_value() for x in r if x.get_value() in last_updated ] if dbs: db = dbs[0] if any(x.get_prop_nr() == 'P813' and last_updated[db] - x.get_value() > DAYS for x in r): setattr(s, 'remove', '') s_dep.append(s) if s_dep: print("-----") print(qid) print(len(s_dep)) print([(x.get_prop_nr(), x.value) for x in s_dep]) print([(x.get_references()[0]) for x in s_dep]) wd_item = wdi_core.WDItemEngine(wd_item_id=qid, domain='none', data=s_dep, fast_run=False) wdi_helpers.try_write(wd_item, '', '', login, edit_summary="remove deprecated statements")
def create(self, login): # get names of components labels = getConceptLabels(self.component_qids) name = " / ".join(labels.values()) + " combination therapy" description = "combination therapy" # has part s = [ wdi_core.WDItemID(x, PROPS['has part']) for x in self.component_qids ] # instance of combination therapy s.append(wdi_core.WDItemID("Q1304270", PROPS['instance of'])) item = wdi_core.WDItemEngine(item_name=name, data=s, domain="asdf") item.set_label(name) item.set_description(description) success = try_write(item, record_id=";".join(self.component_qids), record_prop='', login=login) if success: self.combo_qid[self.component_qids] = item.wd_item_id self.qid_combo[item.wd_item_id] = self.component_qids return item.wd_item_id else: raise ValueError("unsuccessful item creation")
def create_item(self, fast_run=True, write=True): self.parse_external_ids() self.statements = self.create_statements() # remove subclass of gene statements # s = wdi_core.WDItemID("Q7187", "P279") # setattr(s, 'remove', '') self.create_label() self.create_description() self.create_aliases() self.fast_run_base_filter = { PROPS['Entrez Gene ID']: '', PROPS['found in taxon']: self.organism_info['wdid'] } self.wd_item_gene = wdi_core.WDItemEngine( item_name=self.label, domain='genes', data=self.statements, append_value=[PROPS['instance of']], fast_run=fast_run, fast_run_base_filter=self.fast_run_base_filter, fast_run_use_refs=True, ref_handler=update_retrieved_if_new, global_ref_mode="CUSTOM", core_props=core_props) self.wd_item_gene = self.set_label_desc_aliases(self.wd_item_gene) self.status = wdi_helpers.try_write( self.wd_item_gene, self.external_ids['Entrez Gene ID'], PROPS['Entrez Gene ID'], self.login, write=write)
def create_item(self, login=None, write=True): if self.deprecated: return None try: s = [] s.extend(self.create_xref_statements()) s.extend(self.create_main_statements_nodepend()) wd_item = wdi_core.WDItemEngine( item_name=self.lbl, data=s, domain=self.domain, append_value=[PROPS['subclass of'], PROPS['instance of']], fast_run=self.fast_run, fast_run_base_filter={self.primary_ext_prop_qid: ''}) if wd_item.get_label(lang="en") == "": wd_item.set_label(self.lbl, lang="en") current_descr = wd_item.get_description(lang='en') if current_descr.lower() in { "", self.default_label } and self.definition and len(self.definition) < 250: wd_item.set_description(description=self.definition, lang='en') elif current_descr.lower() == "": wd_item.set_description(description=self.default_label, lang='en') if self.synonyms is not None: wd_item.set_aliases(aliases=self.synonyms, lang='en', append=True) if self.wikilink is not None: wd_item.set_sitelink(site="enwiki", title=self.wikilink) wdi_helpers.try_write(wd_item, record_id=self.id_colon, record_prop=self.primary_ext_prop_qid, login=login, write=write) self.wd_item_id = wd_item.wd_item_id return wd_item except Exception as e: exc_info = sys.exc_info() traceback.print_exception(*exc_info) msg = wdi_helpers.format_msg(self.id_colon, self.primary_ext_prop_qid, None, str(e), msg_type=type(e)) wdi_core.WDItemEngine.log("ERROR", msg)
def create_chrom(self, organism_name, organism_qid, chrom_name, genome_id, chr_type, login): def make_ref(retrieved, genome_id): """ Create reference statement for chromosomes :param retrieved: datetime :type retrieved: datetime :param genome_id: refseq genome id :type genome_id: str :return: """ refs = [ wdi_core.WDItemID(value='Q20641742', prop_nr='P248', is_reference=True), # stated in ncbi gene wdi_core.WDString(value=genome_id, prop_nr='P2249', is_reference=True), # Link to Refseq Genome ID wdi_core.WDTime(retrieved.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True) ] return refs item_name = '{} {}'.format(organism_name, chrom_name) item_description = 'bacterial {}'.format(chr_type) print(genome_id) retrieved = datetime.now() reference = make_ref(retrieved, genome_id) # instance of chr_type chr_type = chr_type.lower() if chr_type not in self.chr_type_map: raise ValueError("unknown chromosome type: {}".format(chr_type)) statements = [wdi_core.WDItemID(value=self.chr_type_map[chr_type], prop_nr='P31', references=[reference])] # found in taxon statements.append(wdi_core.WDItemID(value=organism_qid, prop_nr='P703', references=[reference])) # genome id statements.append(wdi_core.WDString(value=genome_id, prop_nr='P2249', references=[reference])) wd_item = wdi_core.WDItemEngine(data=statements, append_value=['P31'], fast_run=True, fast_run_base_filter={'P703': organism_qid, 'P2249': ''}, core_props=core_props) if wd_item.wd_item_id: return wd_item.wd_item_id if login is None: raise ValueError("Login is required to create item") wd_item.set_label(item_name) wd_item.set_description(item_description, lang='en') wdi_helpers.try_write(wd_item, genome_id, 'P2249', login) return wd_item.wd_item_id
def create_item(self, fast_run=True, write=True): try: self.parse_external_ids() self.statements = self.create_statements() self.create_label() self.create_description() self.create_aliases() wd_item_protein = wdi_core.WDItemEngine( item_name=self.label, domain='proteins', data=self.statements, append_value=[ PROPS['instance of'], PROPS['encoded by'], PROPS['Ensembl Protein ID'], PROPS['RefSeq Protein ID'] ], fast_run=fast_run, fast_run_base_filter={ PROPS['UniProt ID']: '', PROPS['found in taxon']: self.organism_info['wdid'] }) wd_item_protein.set_label(self.label) wd_item_protein.set_description(self.description, lang='en') # remove the alias "protein" current_aliases = set(wd_item_protein.get_aliases()) aliases = current_aliases | set(self.aliases) if "protein" in aliases: aliases.remove("protein") wd_item_protein.set_aliases(aliases, append=False) wdi_helpers.try_write(wd_item_protein, self.external_ids['UniProt ID'], PROPS['UniProt ID'], self.login, write=write) self.protein_wdid = wd_item_protein.wd_item_id except Exception as e: exc_info = sys.exc_info() traceback.print_exception(*exc_info) msg = wdi_helpers.format_msg(self.external_ids['UniProt ID'], PROPS['UniProt ID'], None, str(e), msg_type=type(e)) wdi_core.WDItemEngine.log("ERROR", msg)
def create_edges(self): edges = self.edges subj_edges = edges.groupby(":START_ID") # subj, rows = "UniProt:Q96IV0", edges[edges[':START_ID']=='ClinVarVariant:50962'] for subj, rows in tqdm(subj_edges, total=len(subj_edges)): subj = self.dbxref_qid.get(rows.iloc[0][':START_ID']) ss = self.create_subj_edges(rows) if not ss: continue item = self.item_engine(wd_item_id=subj, data=ss, domain="asdf") wdi_helpers.try_write(item, rows.iloc[0][':START_ID'], self.dbxref_pid, self.login, write=self.write)
def write_item(self, wd_item): if self.write: try: wdi_helpers.try_write(wd_item['item'], record_id=wd_item['record_id'], record_prop=wd_item['record_prop'], edit_summary='edit genetic association', login=self.login, write=self.write) except Exception as e: print(e) wdi_core.WDItemEngine.log( "ERROR", wdi_helpers.format_msg(wd_item['record_id'], wd_item['record_prop'], wd_item['item'].wd_item_id, str(e), type(e)))