def run_one(row): label = row['#Organism/Name'] taxid = str(row['TaxID']) genbank_id = row['Assembly Accession'] s = [ wdi_core.WDExternalID(genbank_id, uri_map[PROPS['GenBank Assembly accession']], references=[create_reference(genbank_id)]), wdi_core.WDExternalID(taxid, uri_map[PROPS['NCBI Taxonomy ID']], references=[create_reference(genbank_id)]), ] item = localItemEngine( data=s, item_name=label, domain="organism", fast_run=True, fast_run_base_filter={uri_map[PROPS['NCBI Taxonomy ID']]: ''}) item.set_label(label) item.set_description("bug") wdi_helpers.try_write( item, login=login, record_id=genbank_id, record_prop=uri_map[PROPS['GenBank Assembly accession']])
def test_fast_run(self): qid = 'Q27552312' statements = [ wdi_core.WDExternalID(value='P40095', prop_nr='P352'), wdi_core.WDExternalID(value='YER158C', prop_nr='P705') ] frc = wdi_fastrun.FastRunContainer( base_filter={ 'P352': '', 'P703': 'Q27510868' }, base_data_type=wdi_core.WDBaseDataType, engine=wdi_core.WDItemEngine) fast_run_result = frc.write_required(data=statements) if fast_run_result: message = 'fastrun failed' else: message = 'successful fastrun' print(fast_run_result, message) # here, fastrun should succeed, if not, test failed if fast_run_result: raise ValueError
def test_ref_equals(): # statements are identical oldref = [ wdi_core.WDExternalID(value='P58742', prop_nr='P352'), wdi_core.WDItemID(value='Q24784025', prop_nr='P527'), wdi_core.WDTime('+2001-12-31T12:01:13Z', prop_nr='P813') ] olditem = wdi_core.WDItemID("Q123", "P123", references=[oldref]) newitem = copy.deepcopy(olditem) assert olditem.equals(newitem, include_ref=False) assert olditem.equals(newitem, include_ref=True) # dates are a month apart newitem = copy.deepcopy(olditem) newitem.references[0][2] = wdi_core.WDTime('+2002-1-31T12:01:13Z', prop_nr='P813') assert olditem.equals(newitem, include_ref=False) assert not olditem.equals(newitem, include_ref=True) # multiple refs newitem = copy.deepcopy(olditem) newitem.references.append( [wdi_core.WDExternalID(value='99999', prop_nr='P352')]) assert olditem.equals(newitem, include_ref=False) assert not olditem.equals(newitem, include_ref=True) olditem.references.append( [wdi_core.WDExternalID(value='99999', prop_nr='P352')]) assert olditem.equals(newitem, include_ref=True)
def do_pharm_prod(drug_qid, brand_rxnorm, emea, url, brand_name): # write info on the pharmaceutical product page ref = create_ref_statement(emea, url) # has active substance s = [wdi_core.WDItemID(drug_qid, 'P3781', references=[ref])] # instance of s.append(wdi_core.WDItemID('Q28885102', 'P31', references=[ref])) # pharmaceutical product s.append(wdi_core.WDItemID('Q169336', 'P31', references=[ref])) # chemical mixture # emea s.append(wdi_core.WDExternalID(emea, 'P3637', references=[ref])) if not pd.isnull(brand_rxnorm): s.append(wdi_core.WDExternalID(str(int(brand_rxnorm)), "P3345")) item = wdi_core.WDItemEngine(item_name=brand_name, data=s, domain="drugs", append_value=['P3781']) item.set_label(brand_name) if item.get_description() == '': item.set_description("pharmaceutical product") wdi_helpers.try_write(item, emea, 'P3637', login, edit_summary="add 'active ingredient'") return item.wd_item_id
def create_xref_statements(self): if not self.reference: self.create_reference() self.s_xref = [] self.s_xref.append( wdi_core.WDExternalID(self.doid, PROPS['Disease Ontology ID'], references=[self.reference])) for xref in self.xrefs: prefix, code = xref.split(":", 1) if prefix in DOGraph.xref_prop: self.s_xref.append( wdi_core.WDExternalID(code, DOGraph.xref_prop[prefix], references=[self.reference]))
def create_xref_statements(self): if not self.reference: self.create_reference() self.s_xref = [] self.s_xref.append(wdi_core.WDExternalID(self.doid, PROPS['Disease Ontology ID'], references=[self.reference])) for xref in self.xrefs: if ":" in xref: prefix, code = xref.split(":", 1) prefix = prefix.strip() code = code.strip() if prefix.upper() in DOGraph.xref_prop: if prefix.upper() == "OMIM" and code.startswith("PS"): continue self.s_xref.append( wdi_core.WDExternalID(code, DOGraph.xref_prop[prefix.upper()], references=[self.reference]))
def create_relationships(self, login, write=True): try: # endpoint may not get updated in time? self.do_wdid_lookup() except KeyError as e: wdi_core.WDItemEngine.log("ERROR", format_msg(self.id, INTERPRO, None, str(e), type(e))) return statements = [wdi_core.WDExternalID(value=self.id, prop_nr=INTERPRO, references=[self.reference])] if self.parent: # subclass of statements.append(wdi_core.WDItemID(value=self.parent_wdid, prop_nr='P279', references=[self.reference])) if self.contains: for c in self.contains_wdid: statements.append(wdi_core.WDItemID(value=c, prop_nr='P527', references=[self.reference])) # has part if self.found_in: for f in self.found_in_wdid: statements.append(wdi_core.WDItemID(value=f, prop_nr='P361', references=[self.reference])) # part of if len(statements) == 1: return wd_item = wdi_core.WDItemEngine(wd_item_id=self.wdid, domain='interpro', data=statements, append_value=['P279', 'P527', 'P361'], fast_run=True, fast_run_base_filter=IPRTerm.fast_run_base_filter) wdi_helpers.try_write(wd_item, self.id, INTERPRO, login, edit_summary="create/update subclass/has part/part of", write=write)
def create_item(self, login=None, fast_run=True, write=True): # if no login given, write will not be attempted statements = [wdi_core.WDExternalID(value=self.id, prop_nr=INTERPRO, references=[self.reference]), wdi_core.WDItemID(value=self.type_wdid, prop_nr=INSTANCE_OF, references=[self.reference])] try: wd_item = wdi_core.WDItemEngine(item_name=self.name, domain='interpro', data=statements, append_value=["P279", "P31"], fast_run=fast_run, fast_run_base_filter=IPRTerm.fast_run_base_filter) except JSONDecodeError as e: wdi_core.WDItemEngine.log("ERROR", wdi_helpers.format_msg(self.id, INTERPRO, None, str(e), msg_type=type(e))) return None wd_item.set_label(self.name, lang='en') for lang, description in self.lang_descr.items(): if wd_item.get_description(lang=lang) == "": wd_item.set_description(description, lang=lang) wd_item.set_aliases([self.short_name, self.id]) if login: wdi_helpers.try_write(wd_item, self.id, INTERPRO, login, write=write) return wd_item
def set_taxon(taxid): ncbiTaxon = json.loads( requests.get( "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=taxonomy&id={}&format=json" .format(taxid)).text) taxonitemStatements = [] ncbiTaxref = createNCBITaxReference(taxid, retrieved) ## instance of taxonitemStatements.append( wdi_core.WDItemID(value="Q16521", prop_nr="P31", references=[copy.deepcopy(ncbiTaxref)])) ## NCBI tax id taxonitemStatements.append( wdi_core.WDExternalID(value=taxid, prop_nr="P685", references=[copy.deepcopy(ncbiTaxref)])) ## scientificname scientificName = ncbiTaxon["result"][taxid]['scientificname'] taxonitemStatements.append( wdi_core.WDString(scientificName, prop_nr="P225", references=[copy.deepcopy(ncbiTaxref)])) item = wdi_core.WDItemEngine(data=taxonitemStatements) if item.get_label() == "": item.set_label(label=scientificName, lang="en") if item.get_label() != scientificName: item.set_aliases(aliases=[scientificName]) if item.get_description(lang="en") == "": item.set_description(description="strain of virus", lang="en") return item
def run_one(taxid, genbank_id): # get the QID taxid = str(taxid) if taxid not in tax_qid_map: msg = wdi_helpers.format_msg( genbank_id, PROPS['GenBank Assembly accession'], "", "organism with taxid {} not found or skipped".format(taxid)) wdi_core.WDItemEngine.log("WARNING", msg) return None qid = tax_qid_map[taxid] reference = create_reference(genbank_id) genbank_statement = wdi_core.WDExternalID( genbank_id, PROPS['GenBank Assembly accession'], references=[reference]) # create the item object, specifying the qid item = wdi_core.WDItemEngine( data=[genbank_statement], wd_item_id=qid, fast_run=True, fast_run_base_filter={PROPS['GenBank Assembly accession']: ''}, global_ref_mode='CUSTOM', fast_run_use_refs=True, ref_handler=update_retrieved_if_new) wdi_helpers.try_write(item, record_id=genbank_id, record_prop=PROPS['GenBank Assembly accession'], login=login, edit_summary="update GenBank Assembly accession")
def make_ref(nui): refs = [[ wdi_core.WDItemID(value='Q21008030', prop_nr='P248', is_reference=True), # stated in ndfrt wdi_core.WDExternalID(value=nui, prop_nr='P2115', is_reference=True), # NDF-RT ID wdi_core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True) # retrieved ]] return refs
def create(self, label: str, rxcui: str, ingredient_qids: list): rxcui = str(rxcui) # check to make sure it doesn't exist if rxcui in self.rxnorm_qid: raise ValueError("rxcui {} already exists: {}".format(rxcui, self.rxnorm_qid[rxcui])) # check by ingredients qid = self.get_mixture_qid(ingredient_qids) if qid: raise ValueError("mixture already exists: {}".format(qid)) # has part s = [wdi_core.WDItemID(x, 'P527', references=make_ref(rxcui)) for x in ingredient_qids] # instance of s.append(wdi_core.WDItemID('Q12140', 'P31', references=make_ref(rxcui))) # drug s.append(wdi_core.WDItemID('Q79529', 'P31', references=make_ref(rxcui))) # chemical substance s.append(wdi_core.WDItemID('Q169336', 'P31', references=make_ref(rxcui))) # mixture # rxnorm s.append(wdi_core.WDExternalID(rxcui, "P3345", references=make_ref(rxcui))) item = wdi_core.WDItemEngine(item_name=label, data=s, domain="drugs") if item.create_new_item: item.set_label(label) item.set_label(label) if not item.get_description(): item.set_description("combination drug") item.write(self.login) qid = item.wd_item_id # update cache self.components_mixture[frozenset(ingredient_qids)] = qid self.mixture_components[qid] = ingredient_qids self.rxnorm_qid[rxcui] = qid return qid
def make_ref(rxnorm): refs = [[ wdi_core.WDItemID(value='Q7383767', prop_nr='P248', is_reference=True), # stated in rxnorm wdi_core.WDExternalID(value=rxnorm, prop_nr='P3345', is_reference=True), # rxcui wdi_core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True) # retrieved ]] return refs
def to_wikidata(self): refs = [[ wdi_core.WDItemID(value='Q278487', prop_nr='P248', is_reference=True), # stated in wdi_core.WDExternalID(value=self.cid, prop_nr='P662', is_reference=True), # source element wdi_core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True) # retrieved ]] elements = {'P662': self.cid[3:]} data = [] for k, v in elements.items(): if not v: continue print('{}:'.format(k), v) if isinstance(v, list) or isinstance(v, set): for x in v: data.append( wdi_core.WDString(prop_nr=k, value=x, references=refs)) else: data.append( wdi_core.WDString(prop_nr=k, value=v, references=refs)) return data
def test_fastrun_label(self): # tests fastrun label, description and aliases, and label in another language data = [wdi_core.WDExternalID('/m/02j71', 'P646')] fast_run_base_filter = {'P361': 'Q18589965'} item = wdi_core.WDItemEngine(wd_item_id="Q2", data=data, fast_run=True, fast_run_base_filter=fast_run_base_filter) frc = wdi_core.WDItemEngine.fast_run_store[0] frc.debug = True assert item.get_label('en') == "Earth" descr = item.get_description('en') assert len(descr) > 3 aliases = item.get_aliases() assert "Terra" in aliases assert list( item.fast_run_container.get_language_data("Q2", 'en', 'label'))[0] == "Earth" assert item.fast_run_container.check_language_data( "Q2", ['not the Earth'], 'en', 'label') assert "Terra" in item.get_aliases() assert "planet" in item.get_description() assert item.get_label("es") == "Tierra" item.set_description(descr) item.set_description("fghjkl") assert item.wd_json_representation['descriptions']['en'] == { 'language': 'en', 'value': 'fghjkl' } item.set_label("Earth") item.set_label("xfgfdsg") assert item.wd_json_representation['labels']['en'] == { 'language': 'en', 'value': 'xfgfdsg' } item.set_aliases(["fake alias"], append=True) assert { 'language': 'en', 'value': 'fake alias' } in item.wd_json_representation['aliases']['en'] # something thats empty (for now.., can change, so this just makes sure no exception is thrown) frc.check_language_data("Q2", ['Ewiase'], 'ak', 'label') frc.check_language_data("Q2", ['not Ewiase'], 'ak', 'label') frc.check_language_data("Q2", [''], 'ak', 'description') frc.check_language_data("Q2", [], 'ak', 'aliases') frc.check_language_data("Q2", ['sdf', 'sdd'], 'ak', 'aliases') item.get_label("ak") item.get_description("ak") item.get_aliases("ak") item.set_label("label", lang="ak") item.set_description("d", lang="ak") item.set_aliases(["a"], lang="ak", append=True)
def create_ref_statement(emea_id, url): ref_url = wdi_core.WDUrl(url, prop_nr='P854', is_reference=True) ref_emea = wdi_core.WDExternalID(emea_id, 'P3637', is_reference=True) ref_retrieved = wdi_core.WDTime(strftime("+%Y-%m-%dT00:00:00Z", gmtime()), prop_nr='P813', is_reference=True) reference = [ref_emea, ref_url, ref_retrieved] return reference
def create_ref_statement(self, doid): if not self.release: self.create_release() stated_in = wdi_core.WDItemID(value=self.release, prop_nr='P248', is_reference=True) ref_doid = wdi_core.WDExternalID(value=doid, prop_nr='P699', is_reference=True) ref_retrieved = wdi_core.WDTime(strftime("+%Y-%m-%dT00:00:00Z", gmtime()), prop_nr='P813', is_reference=True) do_reference = [stated_in, ref_retrieved, ref_doid] return do_reference
def append_cellosaurus_id(cellosaurus_cell_line_id, information_to_insert_on_wikidata, reference): # P3289 : Cellosaurus ID information_to_insert_on_wikidata.append( wdi_core.WDExternalID(value=cellosaurus_cell_line_id, prop_nr="P3289", references=reference)) return information_to_insert_on_wikidata
def createMONDOReference(id): statedin = wdi_core.WDItemID("Q2", prop_nr="P6", is_reference=True) retrieved = datetime.now() timeStringNow = retrieved.strftime("+%Y-%m-%dT00:00:00Z") refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr="P7", is_reference=True) mondoid = wdi_core.WDExternalID(id, prop_nr="P5", is_reference=True) return [statedin, refRetrieved, mondoid]
async def add_topic(self, topic_id: str, categories: [str], fid=None) -> str: data = [ wdi_core.WDExternalID(value=topic_id, prop_nr=self.PROP_TOPIC), wdi_core.WDItemID('Q887', prop_nr=self.PROP_POST_TYPE) ] for c in categories: data.append( wdi_core.WDExternalID(value=c, prop_nr=self.PROP_CATEGORY)) if fid: data.append(wdi_core.WDExternalID(value=fid, prop_nr=self.PROP_FID)) cur_id = await get_qid_from_property(12, topic_id) wd_item = wdi_core.WDItemEngine(wd_item_id=cur_id, data=data, mediawiki_api_url=self.MW_URL) return wd_item.write(self.get_login())
def update_song(entity, song_obj, artist_wikidata_id, artist_name): existing_instanceOf_ids = get_wikidata_property_values( entity, INSTANCE_OF_ID) song_name = get_song_name(song_obj) data = entity.statements song_mb_id = get_musicbrainz_song_id(song_obj) # Set to be musician if not already if SONG_ID not in existing_instanceOf_ids: # Add occupation musician relation data.append(wdi_core.WDItemID(value=SONG_ID, prop_nr=INSTANCE_OF_ID)) # Set MusicBrainzID if not already set if MUSIC_BRAINZ_SONG_PROP_ID not in entity.wd_json_representation[ 'claims']: data.append( wdi_core.WDExternalID(value=get_musicbrainz_song_id(song_obj), prop_nr=MUSIC_BRAINZ_SONG_PROP_ID)) elif song_mb_id not in get_wikidata_property_values( entity, MUSIC_BRAINZ_SONG_PROP_ID): data.append( wdi_core.WDExternalID(value=get_musicbrainz_song_id(song_obj), prop_nr=MUSIC_BRAINZ_SONG_PROP_ID)) # Set performer to be the artist if no performer set or performer is not artist if PERFORMER_ID not in entity.wd_json_representation['claims']: data.append( wdi_core.WDItemID(value=artist_wikidata_id, prop_nr=PERFORMER_ID)) elif artist_wikidata_id not in get_wikidata_property_values( entity, PERFORMER_ID): data.append( wdi_core.WDItemID(value=artist_wikidata_id, prop_nr=PERFORMER_ID)) # Set performer to be the artist if no performer set or performer is not artist # Set song name property if TITLE_ID not in entity.wd_json_representation['claims']: data.append( wdi_core.WDMonolingualText(value=song_name, prop_nr=TITLE_ID)) if get_description(entity) == '': entity.set_description(f"Song performed by {artist_name}") write_to_wikidata(entity, data) print( f"Song {song_name} by {artist_name} has been updated on WikiData server." )
def add_extra_statements(self): if self.url: is_download_url = wdi_core.WDItemID('Q7126717', 'P642', is_qualifier=True) self.statements.append( wdi_core.WDUrl(self.url, 'P2699', qualifiers=[is_download_url])) if self.doi: self.statements.append(wdi_core.WDExternalID(self.doi, 'P356'))
def create_ref_statement(self): assert self.graph.release_qid, "create the release first (on the graph class)" stated_in = wdi_core.WDItemID(value=self.graph.release_qid, prop_nr=self.helper.get_pid('P248'), is_reference=True) ref_extid = wdi_core.WDExternalID(value=self.id_value, prop_nr=self.id_pid, is_reference=True) ref_retrieved = wdi_core.WDTime(strftime("+%Y-%m-%dT00:00:00Z", gmtime()), prop_nr=self.helper.get_pid('P813'), is_reference=True) reference = [stated_in, ref_retrieved, ref_extid] return reference
def create_xref_statements(self): """ These are string only and do not rely on any other items existing :return: """ if not self.reference: self.create_reference() s = [ wdi_core.WDExternalID(self.id_colon, self.primary_ext_prop_qid, references=[self.reference]) ] for xref in self.xrefs: prefix, code = xref.split(":", 1) if prefix in self.graph.xref_props: s.append( wdi_core.WDExternalID(code, self.graph.xref_props[prefix], references=[self.reference])) return s
def append_mesh_id(cell_line_object, data_to_add_to_wikidata): # P486 : MeSH ID cell_line_mesh = cell_line_object.cell_line_dump["MeSH"] reference = cell_line_object.references_in_wdi_format if cell_line_mesh != "NULL": data_to_add_to_wikidata.append( wdi_core.WDExternalID( value=cell_line_object.cell_line_dump["MeSH"], prop_nr="P486", references=reference)) return data_to_add_to_wikidata
def buildRefs(refseq): refs = [ wdi_core.WDItemID(value="Q7307074", prop_nr='P248', is_reference=True) ] refs.append( wdi_core.WDTime("+2018-09-12T00:00:00Z", prop_nr='P813', is_reference=True)) refs.append( wdi_core.WDExternalID(value=refseq, prop_nr='P2249', is_reference=True)) return refs
def create_xref_statement(self, xref): ref = self.create_ref_statement() if xref.split(":")[0] not in cu.curie_map: # log this curie prefix not being found m = wdi_helpers.format_msg(self.id_curie, self.id_pid, self.qid, "curie prefix not found: {}".format(xref.split(":")[0])) wdi_core.WDItemEngine.log("WARNING", m) return None pid, ext_id = cu.parse_curie(xref) pid = self.helper.get_pid(pid) self.pids.add(pid) return wdi_core.WDExternalID(ext_id, pid, references=[ref])
def append_hpscreg_id(cell_line_object, data_to_add_to_wikidata): # P9554 : hPSCreg cell line ID cell_line_hpscreg = cell_line_object.cell_line_dump["hPSCreg"] reference = cell_line_object.references_in_wdi_format if cell_line_hpscreg != "NULL": data_to_add_to_wikidata.append( wdi_core.WDExternalID( value=cell_line_object.cell_line_dump["hPSCreg"], prop_nr="P9554", references=reference, )) return data_to_add_to_wikidata
def create_ref_statement(release_qid, external_prop_id, external_id): stated_in = wdi_core.WDItemID(value=release_qid, prop_nr='P248', is_reference=True) ref_external_id = wdi_core.WDExternalID(value=external_id, prop_nr=external_prop_id, is_reference=True) ref_retrieved = wdi_core.WDTime(strftime("+%Y-%m-%dT00:00:00Z", gmtime()), prop_nr='P813', is_reference=True) do_reference = [stated_in, ref_retrieved, ref_external_id] return do_reference
def make_ref(mesh_id): refs = [[ wdi_core.WDItemID(value=ITEMS['Medical Subject Headings'], prop_nr='P248', is_reference=True), # stated in mesh wdi_core.WDExternalID(value=mesh_id, prop_nr=PROPS['MeSH ID'], is_reference=True), # mesh id wdi_core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True) # retrieved ]] return refs