def write2wikidata(qid, value): print("%s %s" % (qid, value)) statement = wdi_core.WDMonolingualText(value=value, prop_nr='P2561', references=[[ref]]) item = wdi_core.WDItemEngine(wd_item_id=qid, data=[statement], domain=None, use_sparql=True) item.write(login=login)
def append(self, datatype, prop_nr, value, qualifiers=[]): """ Append a statement to a WikidataEntry. @param datatype: string, externalid, itemid, or date @param prop_nr: string Wikidata property ID (e.g. P31) @param value: string representing the value of the statement @param qualifiers: list of qualifiers that are WDBaseDataType children """ statement = '' if len(value) == 0: return if datatype == 'string': statement = wdi_core.WDString(value=value, prop_nr=prop_nr, references=self.ref, qualifiers=qualifiers) elif datatype == 'monolingual': statement = wdi_core.WDMonolingualText(value=value, prop_nr=prop_nr, references=self.ref, qualifiers=qualifiers, language='en') elif datatype == 'externalid': statement = wdi_core.WDExternalID(value, prop_nr=prop_nr, references=self.ref, qualifiers=qualifiers) elif datatype == 'itemid': statement = wdi_core.WDItemID(value=value, prop_nr=prop_nr, references=self.ref, qualifiers=qualifiers) elif datatype == 'date': # Technically this should be "time" statement = wdi_core.WDTime(value, prop_nr=prop_nr, references=self.ref, qualifiers=qualifiers) else: raise ValueError( '`datatype` should be string, externalid, itemid, or date') self.data.append(statement)
def update_song(entity, song_obj, artist_wikidata_id, artist_name): existing_instanceOf_ids = get_wikidata_property_values( entity, INSTANCE_OF_ID) song_name = get_song_name(song_obj) data = entity.statements song_mb_id = get_musicbrainz_song_id(song_obj) # Set to be musician if not already if SONG_ID not in existing_instanceOf_ids: # Add occupation musician relation data.append(wdi_core.WDItemID(value=SONG_ID, prop_nr=INSTANCE_OF_ID)) # Set MusicBrainzID if not already set if MUSIC_BRAINZ_SONG_PROP_ID not in entity.wd_json_representation[ 'claims']: data.append( wdi_core.WDExternalID(value=get_musicbrainz_song_id(song_obj), prop_nr=MUSIC_BRAINZ_SONG_PROP_ID)) elif song_mb_id not in get_wikidata_property_values( entity, MUSIC_BRAINZ_SONG_PROP_ID): data.append( wdi_core.WDExternalID(value=get_musicbrainz_song_id(song_obj), prop_nr=MUSIC_BRAINZ_SONG_PROP_ID)) # Set performer to be the artist if no performer set or performer is not artist if PERFORMER_ID not in entity.wd_json_representation['claims']: data.append( wdi_core.WDItemID(value=artist_wikidata_id, prop_nr=PERFORMER_ID)) elif artist_wikidata_id not in get_wikidata_property_values( entity, PERFORMER_ID): data.append( wdi_core.WDItemID(value=artist_wikidata_id, prop_nr=PERFORMER_ID)) # Set performer to be the artist if no performer set or performer is not artist # Set song name property if TITLE_ID not in entity.wd_json_representation['claims']: data.append( wdi_core.WDMonolingualText(value=song_name, prop_nr=TITLE_ID)) if get_description(entity) == '': entity.set_description(f"Song performed by {artist_name}") write_to_wikidata(entity, data) print( f"Song {song_name} by {artist_name} has been updated on WikiData server." )
def test_new_item_creation(self): data = [ wdi_core.WDString(value='test', prop_nr='P1'), wdi_core.WDString(value='test1', prop_nr='P2'), wdi_core.WDMath("xxx", prop_nr="P3"), wdi_core.WDExternalID("xxx", prop_nr="P4"), wdi_core.WDItemID("Q123", prop_nr="P5"), wdi_core.WDTime('+%Y-%m-%dT%H:%M:%SZ', "P6"), wdi_core.WDUrl("http://www.google.com", "P7"), wdi_core.WDMonolingualText("xxx", prop_nr="P8"), wdi_core.WDQuantity(5, prop_nr="P9"), wdi_core.WDQuantity(5, upper_bound=9, lower_bound=2, prop_nr="P10"), wdi_core.WDCommonsMedia("xxx", prop_nr="P11"), wdi_core.WDGlobeCoordinate(1.2345, 1.2345, 12, prop_nr="P12"), wdi_core.WDGeoShape("xxx", prop_nr="P13"), wdi_core.WDProperty("P123", "P14") ] core_props = set(["P{}".format(x) for x in range(20)]) for d in data: item = wdi_core.WDItemEngine(item_name='dae', domain="szadf", data=[d], core_props=core_props) assert item.get_wd_json_representation() item = wdi_core.WDItemEngine(item_name='dae', domain="szadf", data=[d], core_props=set()) assert item.get_wd_json_representation() item = wdi_core.WDItemEngine(item_name='dae', domain="szadf", data=data, core_props=core_props) assert item.get_wd_json_representation() item = wdi_core.WDItemEngine(item_name='dae', domain="szadf", data=data, core_props=set()) assert item.get_wd_json_representation()
def create_song(song_obj, artist_wikidata_id, artist_name): data = [] song_name = get_song_name(song_obj) data.append(wdi_core.WDItemID(value=SONG_ID, prop_nr=INSTANCE_OF_ID)) data.append( wdi_core.WDItemID(value=artist_wikidata_id, prop_nr=PERFORMER_ID)) data.append( wdi_core.WDExternalID(value=get_musicbrainz_song_id(song_obj), prop_nr=MUSIC_BRAINZ_SONG_PROP_ID)) data.append(wdi_core.WDMonolingualText(value=song_name, prop_nr=TITLE_ID)) entity = wdi_core.WDItemEngine(data=data) entity.set_label(song_name) entity.set_description(f"Song by {artist_name}") login_instance = wdi_login.WDLogin(user='******', pwd='nestor2020') entity.write(login_instance) print( f"Song {song_name} by {artist_name} has been added to WikiData server." )
def process_data(nioshtic_data): """ Creates Wikidata items on most NIOSHTIC entries. Before you execute this method, make sure you have executed CreateJournalArticles.py and then Associator.py. This only handles creation. Filling in the columns from the rest of the NIOSHTIC dataset is handled by a separate class. @param nioshtic_data: dictionary with "entries" and "headers" keys """ for entry in nioshtic_data['entries']: if 'Wikidata' in entry or 'NN' not in entry: continue if 'TI' not in entry: continue if 'SO' in entry: if entry['SO'].endswith(' :1'): continue # Only one page, most likely a flyer if re.match(r'Youth@Work', entry['TI']) is not None \ and re.match(r'edition', entry['TI']) is not None: continue ref = [[ wdi_core.WDItemID(value='Q26822184', prop_nr='P248', is_reference=True), wdi_core.WDExternalID(entry['NN'], prop_nr='P2880', is_reference=True), wdi_core.WDTime(nioshtic_data['retrieved'], prop_nr='P813', is_reference=True) ]] data = [ wdi_core.WDExternalID(entry['NN'], prop_nr='P2880', references=ref), wdi_core.WDItemID(value='Q60346', prop_nr='P859'), wdi_core.WDMonolingualText(value=entry['TI'], prop_nr='P1476', references=ref, language='en') ] t = JournalArticles.clean_title(entry['TI']) i = wdi_core.WDItemEngine(data=data, domain='nioshgreylit', item_name=t) i.set_label(t) try: print(i.write(WIKI_SESSION)) except Exception as e: print(e) continue
def run_one(pathway_id, retrieved, fast_run, write, login, temp): print(pathway_id) pathway_reference = create_reference(pathway_id, retrieved) prep = dict() prep = get_PathwayElements(pathway=pathway_id, datatype="Metabolite", temp=temp, prep=prep) prep = get_PathwayElements(pathway=pathway_id, datatype="GeneProduct", temp=temp, prep=prep) # P703 = found in taxon, Q15978631 = "H**o sapiens" prep["P703"] = [ wdi_core.WDItemID(value="Q15978631", prop_nr='P703', references=[copy.deepcopy(pathway_reference)]) ] query = """ PREFIX wp: <http://vocabularies.wikipathways.org/wp#> PREFIX gpml: <http://vocabularies.wikipathways.org/gpml#> PREFIX dcterms: <http://purl.org/dc/terms/> SELECT DISTINCT ?pathway ?pwId ?pwLabel ?description WHERE { VALUES ?pwId {""" query += "\"" + pathway_id + "\"^^xsd:string}" query += """ ?pathway a wp:Pathway ; dc:title ?pwLabel ; dcterms:description ?description ; dcterms:identifier ?pwId ; <http://vocabularies.wikipathways.org/wp#isAbout> ?details ; wp:organismName "H**o sapiens"^^xsd:string . }""" qres3 = temp.query(query) for row in qres3: #pathway_iri = str(row[0]) pw_id = str(row[1]) pw_label = str(row[2]) description = str(row[3]) ## clean up descriptions description = re.sub(r'https?:\/\/.*[\s\r\n]', '', description) description = description.replace('\n', ' ').replace( '\r', ' ').replace('\'\'\'', '').replace('\'\'', '').replace('[', '').replace(']', '') description = description.replace( 'Proteins on this pathway have targeted assays available via the Portal', '') description = (description[:246] + '...') if len(description) > 246 else description description = 'biological pathway in human' if len( description) < 20 else description # P31 = instance of prep["P31"] = [ wdi_core.WDItemID(value="Q4915012", prop_nr="P31", references=[copy.deepcopy(pathway_reference)]) ] prep["P1476"] = [ wdi_core.WDMonolingualText( value=pw_label, prop_nr="P1476", references=[copy.deepcopy(pathway_reference)]) ] # P2410 = WikiPathways ID prep["P2410"] = [ wdi_core.WDString(pathway_id, prop_nr='P2410', references=[copy.deepcopy(pathway_reference)]) ] # P2888 = exact match prep["P2888"] = [ wdi_core.WDUrl("http://identifiers.org/wikipathways/" + pw_id, prop_nr='P2888', references=[copy.deepcopy(pathway_reference)]) ] query = """ PREFIX wp: <http://vocabularies.wikipathways.org/wp#> PREFIX dcterms: <http://purl.org/dc/terms/> select ?pubmed WHERE { ?pubmed a wp:PublicationReference ; dcterms:isPartOf <""" query += str(row[0]) query += """> .} """ qres4 = temp.query(query) p = re.compile('^[0-9]+$') for pubmed_result in qres4: pmid = str(pubmed_result[0]).replace( "http://identifiers.org/pubmed/", "") print(pmid) m = p.match(pmid) if not m: pmid_qid, _, _ = wdi_helpers.PublicationHelper( pmid, id_type="doi", source="crossref").get_or_create(login if write else None) else: pmid_qid, _, _ = wdi_helpers.PublicationHelper( pmid.replace("PMID:", ""), id_type="pmid", source="europepmc").get_or_create(login if write else None) if pmid_qid is None: return panic(pathway_id, "not found: {}".format(pmid), "pmid") else: if 'P2860' not in prep.keys(): prep["P2860"] = [] print(pmid_qid) prep['P2860'].append( wdi_core.WDItemID( value=str(pmid_qid), prop_nr='P2860', references=[copy.deepcopy(pathway_reference)])) author_query = """ PREFIX wp: <http://vocabularies.wikipathways.org/wp#> SELECT ?author ?authorName ?authorHomepage ?authorQIRI WHERE { <http://identifiers.org/wikipathways/""" + pathway_id + """> dc:creator ?author . ?author a foaf:Person ; foaf:name ?authorName ; foaf:homepage ?authorHomepage . OPTIONAL { ?author owl:sameAs ?authorQIRI . } } """ author_query_res = temp.query(author_query) prep["P2093"] = [] prep["P50"] = [] for row in author_query_res: author_name = str(row[1]) print("author_name") print(author_name) author_homepage = str(row[2]) print("author_homepage") print(author_homepage) # P2093 = author name string author_url_qualifier = wdi_core.WDString(value=author_homepage, prop_nr="P2699", is_qualifier=True) prep["P2093"].append( wdi_core.WDString( author_name, prop_nr='P2093', qualifiers=[copy.deepcopy(author_url_qualifier)], references=[copy.deepcopy(pathway_reference)])) if row[3] != None: # only if row[3] exists (authorQIRI) author_iri = str(row[0]) author_name = str(row[1]) print("author_name") print(author_name) author_qiri = str(row[3]) if ("https://www.wikidata.org/wiki/" in author_qiri): author_qid = author_qiri.replace( "https://www.wikidata.org/wiki/", "") if ("http://www.wikidata.org/entity/" in author_qiri): author_qid = author_qiri.replace( "http://www.wikidata.org/entity/", "") print("author_qid") print(author_qid) # P50 = author prep["P50"].append( wdi_core.WDItemID( author_qid, prop_nr='P50', references=[copy.deepcopy(pathway_reference)])) disease_ontology_query = """ PREFIX wp: <http://vocabularies.wikipathways.org/wp#> PREFIX dcterms: <http://purl.org/dc/terms/> SELECT ?diseaseOntologyTerm WHERE { ?pathwayRDF wp:diseaseOntologyTag ?diseaseOntologyTerm ; foaf:page ?pathway ; dcterms:identifier \"""" + pathway_id + """\"^^xsd:string . } """ disease_ontology_query_res = temp.query(disease_ontology_query) prep["P1050"] = [] for row in disease_ontology_query_res: disease_ontology_iri = str(row[0]) doid = disease_ontology_iri.replace( "http://purl.obolibrary.org/obo/DOID_", "DOID:") print("doid") print(doid) # P1050 = medical condition if doid_qid.get(doid) != None: #skip if qid is missing prep["P1050"].append( wdi_core.WDItemID( doid_qid[doid], prop_nr='P1050', references=[copy.deepcopy(pathway_reference)])) pw_ontology_query = """ PREFIX wp: <http://vocabularies.wikipathways.org/wp#> PREFIX dcterms: <http://purl.org/dc/terms/> SELECT ?pwOntologyTerm WHERE { ?pathwayRDF wp:pathwayOntologyTag ?pwOntologyTerm ; foaf:page ?pathway ; dcterms:identifier \"""" + pathway_id + """\"^^xsd:string . } """ pw_ontology_query_res = temp.query(pw_ontology_query) prep["P921"] = [] for row in pw_ontology_query_res: pw_ontology_iri = str(row[0]) poid = pw_ontology_iri.replace( "http://purl.obolibrary.org/obo/PW_", "PW:") print("poid") print(poid) # P921 = main subject if poid_qid.get(poid) != None: #skip if qid is missing prep["P921"].append( wdi_core.WDItemID( poid_qid[poid], prop_nr='P921', references=[copy.deepcopy(pathway_reference)])) #TODO: Propose Cell Type Ontology ID as new property, add release item, associate terms with WD items. #cell_type_ontology_query = """ # PREFIX wp: <http://vocabularies.wikipathways.org/wp#> # PREFIX dcterms: <http://purl.org/dc/terms/> # SELECT ?cellTypeOntologyTerm # WHERE { # ?pathwayRDF wp:cellTypeOntologyTag ?cellTypeOntologyTerm ; # foaf:page ?pathway ; # dcterms:identifier \"""" + pathway_id + """\"^^xsd:string . # } # """ #cell_type_ontology_query_res = temp.query(cell_type_ontology_query) #prep["P927"] = [] #for row in cell_type_ontology_query_res: # cell_type_ontology_iri = str(row[0]) # ctoid = cell_type_ontology_iri.replace("http://purl.obolibrary.org/obo/CL_", "CL:") # print("ctoid") # print(ctoid) # P927 = anatomical location # prep["P927"].append(wdi_core.WDItem(qid[ctoid], prop_nr='P927', references=[copy.deepcopy(pathway_reference)])) data2add = [] for key in prep.keys(): for statement in prep[key]: data2add.append(statement) print(statement.prop_nr, statement.value) wdPage = wdi_core.WDItemEngine( data=data2add, fast_run=fast_run, fast_run_base_filter=fast_run_base_filter, fast_run_use_refs=True, ref_handler=update_retrieved_if_new_multiple_refs, core_props=core_props) wdPage.set_label(pw_label, lang="en") wdPage.set_description(description, lang="en") try_write(wdPage, record_id=pathway_id, record_prop=PROPS['Wikipathways ID'], edit_summary="Updated a Wikipathways pathway", login=login, write=write)
def geneSymbol_form(request): """ uses wdi to make go annotation edit to wikidata :param request: includes go annotation json for writing to wikidata :return: response data object with a write success boolean """ print("Gene Symbol Form") if request.method == 'POST': body_unicode = request.body.decode('utf-8') body = json.loads(body_unicode) responseData = {} if 'login' not in request.session.keys(): responseData['authentication'] = False return JsonResponse(responseData) login = jsonpickle.decode(request.session['login']) statements = [] refs = [] eutilsPMID = body['pmid'] # construct the references using WDI_core and PMID_tools if necessary try: refs.append( wdi_core.WDItemID(value='Q26489220', prop_nr='P1640', is_reference=True)) refs.append( wdi_core.WDTime(str(strftime("+%Y-%m-%dT00:00:00Z", gmtime())), prop_nr='P813', is_reference=True)) pub = PublicationHelper(eutilsPMID, 'pmid', 'europepmc') result = pub.get_or_create(login) if len(result) > 0 and result[0]: refs.append( wdi_core.WDItemID(value=result[0], prop_nr='P248', is_reference=True)) except Exception as e: print("reference construction error: " + str(e)) statements.append( wdi_core.WDMonolingualText(value=body['geneSymbol'], prop_nr='P2561', references=[refs])) try: print("Writing to gene " + body['geneQID']) if body['geneQID'] != "": wd_item_gene = wdi_core.WDItemEngine( wd_item_id=body['geneQID'], data=statements) wd_item_gene.set_aliases(aliases=[body['geneSymbol']]) wd_item_gene.write(login=login) print("Writing to protein " + body['proteinQID']) if body['proteinQID'] != "": wd_item_protein = wdi_core.WDItemEngine( wd_item_id=body['proteinQID'], data=statements) wd_item_protein.set_aliases(aliases=[body['geneSymbol']]) wd_item_protein.write(login=login) responseData['write_success'] = True except Exception as e: responseData['write_success'] = False return JsonResponse(responseData)
def get_data(manifest): """ Method to retrieve data from PubMed, PubMed Central, and DOI.org databases. Have at least one of a DOI, PMCID, or PMID in each dictionary. From there, this method will query some friendly databases. You can also specify other Wikidata statements to add. @param manifest: a list of dictionaries, with the following keys and values: doi: string or None pmcid: string or None pmid: string or None data: list of additional WDI objects (WDString etc.) to incorporate into the output, or empty list @return list of Wikidata statement objects. """ # To prevent weirdness from unexpected values for entry in manifest: for thing in entry.keys(): if thing not in ['pmid', 'pmcid', 'doi', 'data']: raise ValueError( 'The only permitted keys are doi, pmcid, pmid, and data') months = { 'Jan': '01', 'Feb': '02', 'Mar': '03', 'Apr': '04', 'May': '05', 'Jun': '06', 'Jul': '07', 'Aug': '08', 'Sep': '09', 'Oct': '10', 'Nov': '11', 'Dec': '12' } # Initializing package, a list of objects containing a list of Wikidata item # objects and an object containing raw data. Each object in the package list # corresponds to the list entry in manifest. package = [] lookup = {'pmid': [], 'pmcid': [], 'doi': []} # Associates an identifier with a given manifest/package entry associator = {'pmid': {}, 'pmcid': {}, 'doi': {}} counter = 0 for entry in manifest: if 'data' not in entry: statements = [] else: statements = entry['data'] # Instance of: scientific article statements.append(wdi_core.WDItemID(value='Q13442814', prop_nr='P31')) package.append({'statements': statements, 'raw_data': {}, 'label': ''}) # Append to the lookup lists. API lookups are done in bulk to cut down # on HTTP requests. for id_name, id_value in entry.items(): if id_name == 'data' or id_value is None: # not an ID continue if id_name == 'pmcid' and 'pmid' in entry: if entry['pmid'] is not None: continue # we don't need pmcid if we already have pmid lookup[id_name].append(id_value) associator[id_name][id_value] = counter counter += 1 raw_data = {} raw_data['pmid'] = get_pubmed(lookup['pmid']) raw_data['pmcid'] = get_pubmed_central(lookup['pmcid']) raw_data['doi'] = get_doi_org(lookup['doi']) # Now that the requests are done, we want to painstakingly re-associate each # result object with the corresponding list in the package. This is mostly # to keep me from going crazy. for data_source, data_object in raw_data.items(): for identifier, result in data_object.items(): index = associator[data_source][identifier] package[index]['raw_data'][data_source] = result counter = 0 for entry in package: # We only query PubMed in one place of two. It's basically the same API # but drawing from a different dataset. pubmed_data = {} if 'pmcid' in entry['raw_data']: pubmed_data_source = ('pmc', 'Q229883' ) # for use in refsnak generator pubmed_data = entry['raw_data']['pmcid'] elif 'pmid' in entry['raw_data']: pubmed_data_source = ('pubmed', 'Q180686') pubmed_data = entry['raw_data']['pmid'] doi_data = {} if 'doi' in entry['raw_data']: doi_data = entry['raw_data']['doi'] # If we have data from both PubMed and DOI.org, we are interested in # both. PubMed/PubMed Central has article IDs, while DOI.org has better # author names and better data overall. # Initializing statement variables to prevent duplication/overwrites. statement_title = None statement_doi = None statement_pmid = None statement_pmcid = None statement_pubdate = None statement_publishedin = None statement_volume = None statement_issue = None statement_pages = None statement_origlanguage = None statement_authors = [] if doi_data != {}: doi_ref = generate_refsnak( 'Q28946522', 'https://doi.org/' + urllib.parse.quote_plus(manifest[counter]['doi']), doi_data['__querydate']) if 'title' in doi_data and statement_title is None: t = clean_title(doi_data['title']) if t != '' and t is not None and len(t) < 400: statement_title = wdi_core.WDMonolingualText( value=t, prop_nr='P1476', references=doi_ref, language='en') package[counter]['statements'].append(statement_title) if len(t) < 250: package[counter]['label'] = t if 'DOI' in doi_data and statement_doi is None: statement_doi = wdi_core.WDExternalID(doi_data['DOI'].upper(), prop_nr='P356', references=doi_ref) package[counter]['statements'].append(statement_doi) if 'issued' in doi_data and statement_pubdate is None: date_parts = doi_data['issued']['date-parts'][0] if date_parts != [None]: y = str(date_parts[0]) m = '00' d = '00' precision = 9 if len(date_parts) >= 2: m = str(date_parts[1]).zfill(2) precision = 10 if len(date_parts) == 3: d = str(date_parts[2]).zfill(2) precision = 11 to_add = '+{0}-{1}-{2}T00:00:00Z'.format(y, m, d) statement_pubdate = wdi_core.WDTime(to_add, precision=precision, prop_nr='P577', references=doi_ref) package[counter]['statements'].append(statement_pubdate) if 'ISSN' in doi_data and statement_publishedin is None: journal = issn_to_wikidata(doi_data['ISSN'][0]) if journal is not None: statement_publishedin = wdi_core.WDItemID( value=journal, prop_nr='P1433', references=doi_ref) package[counter]['statements'].append( statement_publishedin) if 'volume' in doi_data and statement_volume is None: statement_volume = wdi_core.WDString(value=doi_data['volume'], prop_nr='P478', references=doi_ref) package[counter]['statements'].append(statement_volume) if 'issue' in doi_data and statement_issue is None: statement_issue = wdi_core.WDString(value=doi_data['issue'], prop_nr='P433', references=doi_ref) package[counter]['statements'].append(statement_issue) if 'page' in doi_data and statement_pages is None: if doi_data['page'] != '' and doi_data['page'] is not None: statement_pages = wdi_core.WDString(value=doi_data['page'], prop_nr='P304', references=doi_ref) package[counter]['statements'].append(statement_pages) if 'author' in doi_data and statement_authors == []: author_counter = 0 for author in doi_data['author']: author_counter += 1 a = '' if 'family' in author: a = author['family'] if 'given' in author: a = author['given'] + ' ' + a a = clean_title(a) qualifier = wdi_core.WDString(value=str(author_counter), prop_nr='P1545', is_qualifier=True) statement_author = wdi_core.WDString( value=a, prop_nr='P2093', qualifiers=[qualifier], references=doi_ref) statement_authors.append(statement_author) for statement in statement_authors: package[counter]['statements'].append(statement) if pubmed_data != {}: u = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db={0}&retmode=json&id={1}' pubmed_ref = generate_refsnak( pubmed_data_source[1], u.format(pubmed_data_source[0], pubmed_data['uid']), pubmed_data['__querydate']) if 'title' in pubmed_data and statement_title is None: t = clean_title(pubmed_data['title']) if t != '' and len(t) < 400: statement_title = wdi_core.WDMonolingualText( value=t, prop_nr='P1476', references=pubmed_ref, language='en') package[counter]['statements'].append(statement_title) if len(t) < 250: package[counter]['label'] = t if 'articleids' in pubmed_data: for block in pubmed_data['articleids']: if block['idtype'] == 'pmc' and statement_pmcid is None: pmcid = block['value'].replace('PMC', '') statement_pmcid = wdi_core.WDExternalID( pmcid, prop_nr='P932', references=pubmed_ref) package[counter]['statements'].append(statement_pmcid) elif block['idtype'] == 'pmcid' and statement_pmcid is None: pmcid = block['value'].replace('PMC', '') statement_pmcid = wdi_core.WDExternalID( pmcid, prop_nr='P932', references=pubmed_ref) package[counter]['statements'].append(statement_pmcid) elif block['idtype'] == 'doi' and statement_doi is None: doi = block['value'].upper() statement_doi = wdi_core.WDExternalID( doi, prop_nr='P356', references=pubmed_ref) package[counter]['statements'].append(statement_doi) elif block['idtype'] in ['pmid', 'pubmed' ] and statement_pmid is None: pmid = block['value'] if pmid != 0 and pmid != '0': statement_pmid = wdi_core.WDExternalID( pmid, prop_nr='P698', references=pubmed_ref) package[counter]['statements'].append( statement_pmid) if 'pubdate' in pubmed_data and statement_pubdate is None: pubdate = None precision = None pubdate_raw = pubmed_data['pubdate'].split(' ') # 2016 Aug 1 if len(pubdate_raw) > 1: if pubdate_raw[1] in months: m = months[pubdate_raw[1]] else: m = '00' if len(pubdate_raw) == 3: # Precision to the day allowed_dates = [str(x).zfill(2) for x in range(1, 32)] if pubdate_raw[2].zfill(2) in allowed_dates: pubdate = "+{0}-{1}-{2}T00:00:00Z".format( pubdate_raw[0], m, pubdate_raw[2].zfill(2)) precision = 11 else: pubdate = "+{0}-{1}-00T00:00:00Z".format( pubdate_raw[0], m) precision = 10 elif len(pubdate_raw) == 2: # Precision to the month pubdate = "+{0}-{1}-00T00:00:00Z".format(pubdate_raw[0], m) precision = 10 elif len(pubdate_raw) == 1: # Precision to the year pubdate = "+{0}-00-00T00:00:00Z".format(pubdate_raw[0]) precision = 9 if pubdate is not None and precision is not None: if re.match(r'\+\d{4}-\d{2}-\d{2}T00:00:00Z', pubdate) is not None: statement_pubdate = wdi_core.WDTime( pubdate, precision=precision, prop_nr='P577', references=pubmed_ref) package[counter]['statements'].append( statement_pubdate) if 'issn' in pubmed_data and statement_publishedin is None: if pubmed_data['issn'] != '': journal = issn_to_wikidata(pubmed_data['issn']) if journal is not None: statement_publishedin = wdi_core.WDItemID( value=journal, prop_nr='P1433', references=pubmed_ref) package[counter]['statements'].append( statement_publishedin) if 'volume' in pubmed_data and statement_volume is None: if pubmed_data['volume'] != '': statement_volume = wdi_core.WDString( value=pubmed_data['volume'], prop_nr='P478', references=pubmed_ref) package[counter]['statements'].append(statement_volume) if 'issue' in pubmed_data and statement_issue is None: if pubmed_data['issue'] != '': statement_issue = wdi_core.WDString( value=pubmed_data['issue'], prop_nr='P433', references=pubmed_ref) package[counter]['statements'].append(statement_issue) if 'pages' in pubmed_data and statement_pages is None: if pubmed_data['pages'] != '': statement_pages = wdi_core.WDString( value=pubmed_data['pages'], prop_nr='P304', references=pubmed_ref) package[counter]['statements'].append(statement_pages) if 'lang' in pubmed_data and statement_origlanguage is None: for langcode in pubmed_data['lang']: # Please post a comment on this webzone if you know the # other possible values for 'lang' if langcode == 'eng': statement_origlanguage = wdi_core.WDItemID( value='Q1860', prop_nr='P364', references=pubmed_ref) package[counter]['statements'].append( statement_origlanguage) if 'authors' in pubmed_data and statement_authors == []: author_counter = 0 for author in pubmed_data['authors']: if author['authtype'] == "Author": author_counter += 1 a = clean_title(author['name']) qualifier = wdi_core.WDString( value=str(author_counter), prop_nr='P1545', is_qualifier=True) statement_author = wdi_core.WDString( value=a, prop_nr='P2093', qualifiers=[qualifier], references=pubmed_ref) statement_authors.append(statement_author) for statement in statement_authors: package[counter]['statements'].append(statement) counter += 1 return package
login_instance = wdi_login.WDLogin(user=cred['user'], pwd=cred['password']) r = wdi_core.WDItemEngine.execute_sparql_query(q) for binding in r['results']['bindings']: item = binding['item']['value'].rpartition('/')[2] pdf = binding['url']['value'] statements = [] try: ref_title = binding['title']['value'] ref_date = binding['date']['value'] refs = [[ wdi_core.WDUrl(pdf, PROPS['ref_url'], is_reference=True), wdi_core.WDMonolingualText(ref_title, PROPS['title'], language='sv', is_reference=True), wdi_core.WDTime(f'+{ref_date}', PROPS['date'], is_reference=True), wdi_core.WDTime(datetime.utcnow().strftime('+%Y-%m-%dT00:00:00Z'), PROPS['retrieved'], is_reference=True), ]] except Exception: refs = [[ wdi_core.WDUrl(pdf, PROPS['ref_url'], is_reference=True), wdi_core.WDTime(f'+{ref_date}', PROPS['date'], is_reference=True), wdi_core.WDTime(datetime.utcnow().strftime('+%Y-%m-%dT00:00:00Z'), PROPS['retrieved'], is_reference=True), ]] try:
def run_one(pathway_id, retrieved, fast_run, write, login, temp): print(pathway_id) pathway_reference = create_reference(pathway_id, retrieved) prep = dict() prep = get_PathwayElements(pathway=pathway_id, datatype="Metabolite", temp=temp, prep=prep) prep = get_PathwayElements(pathway=pathway_id, datatype="GeneProduct", temp=temp, prep=prep) # P703 = found in taxon, Q15978631 = "H**o sapiens" prep["P703"] = [ wdi_core.WDItemID(value="Q15978631", prop_nr='P703', references=[copy.deepcopy(pathway_reference)]) ] query = """ PREFIX wp: <http://vocabularies.wikipathways.org/wp#> PREFIX gpml: <http://vocabularies.wikipathways.org/gpml#> PREFIX dcterms: <http://purl.org/dc/terms/> SELECT DISTINCT ?pathway ?pwId ?pwLabel WHERE { VALUES ?pwId {""" query += "\"" + pathway_id + "\"^^xsd:string}" query += """ ?pathway a wp:Pathway ; dc:title ?pwLabel ; dcterms:identifier ?pwId ; <http://vocabularies.wikipathways.org/wp#isAbout> ?details ; wp:organismName "H**o sapiens"^^xsd:string . }""" qres3 = temp.query(query) for row in qres3: # P31 = instance of prep["P31"] = [ wdi_core.WDItemID(value="Q4915012", prop_nr="P31", references=[copy.deepcopy(pathway_reference)]) ] prep["P1476"] = [ wdi_core.WDMonolingualText( value=str(row[2]), prop_nr="P1476", references=[copy.deepcopy(pathway_reference)]) ] # P2410 = WikiPathways ID prep["P2410"] = [ wdi_core.WDString(pathway_id, prop_nr='P2410', references=[copy.deepcopy(pathway_reference)]) ] # P2888 = exact match prep["P2888"] = [ wdi_core.WDUrl("http://identifiers.org/wikipathways/" + str(row[1]), prop_nr='P2888', references=[copy.deepcopy(pathway_reference)]) ] query = """ PREFIX wp: <http://vocabularies.wikipathways.org/wp#> PREFIX dcterms: <http://purl.org/dc/terms/> select ?pubmed WHERE { ?pubmed a wp:PublicationReference ; dcterms:isPartOf <""" query += str(row[0]) query += """> .} """ qres4 = temp.query(query) p = re.compile('^[0-9]+$') for pubmed_result in qres4: pmid = str(pubmed_result[0]).replace( "http://identifiers.org/pubmed/", "") print(pmid) m = p.match(pmid) if not m: pmid_qid, _, _ = wdi_helpers.PublicationHelper( pmid, id_type="doi", source="crossref").get_or_create(login if write else None) else: pmid_qid, _, _ = wdi_helpers.PublicationHelper( pmid.replace("PMID:", ""), id_type="pmid", source="europepmc").get_or_create(login if write else None) if pmid_qid is None: return panic(pathway_id, "not found: {}".format(pmid), "pmid") else: if 'P2860' not in prep.keys(): prep["P2860"] = [] prep['P2860'].append( wdi_core.WDItemID( value=str(pmid_qid), prop_nr='P2860', references=[copy.deepcopy(pathway_reference)])) data2add = [] for key in prep.keys(): for statement in prep[key]: data2add.append(statement) print(statement.prop_nr, statement.value) wdPage = wdi_core.WDItemEngine( data=data2add, fast_run=fast_run, fast_run_base_filter=fast_run_base_filter, fast_run_use_refs=True, ref_handler=update_retrieved_if_new_multiple_refs, core_props=core_props) wdPage.set_label(str(row[2]), lang="en") wdPage.set_description("biological pathway in human", lang="en") try_write(wdPage, record_id=pathway_id, record_prop=PROPS['Wikipathways ID'], edit_summary="Updated a Wikipathways pathway", login=login, write=write)