def get_data_from_doi(self, doi): doi_doc = FullDoc(doi=doi) if doi_doc.read(self.client): # print("doi_doc.title: ", doi_doc.title) doi_doc.write() else: print("Read document failed.") return doi id = None if not 'pubmed-id' in doi_doc._data.keys(): print("no pubmed-id, trying with title") # try with title Entrez.email = '*****@*****.**' query = doi_doc.title handle = Entrez.esearch(db='pubmed', retmode='xml', term=query) results = Entrez.read(handle) if int(results['Count']) > 0: id = results['IdList'] else: id = doi_doc._data['pubmed-id'] if id != None: return self.fetch_data_from_pubmed(id) else: print("no pubmed id") return doi
def readFullDocWithDOI(self, doiID='10.1016/S1525-1578(10)60571-5'): ## ScienceDirect (full-text) document example using DOI doi_doc = FullDoc(doi=doiID) if doi_doc.read(self.client): print("doi_doc.title: ", doi_doc.title) doi_doc.write() else: print("Read document failed.")
def readFullDocWithPII(self, sd_piiID='S1270963817323015'): ## ScienceDirect (full-text) document example using PII pii_doc = FullDoc(sd_pii=sd_piiID) if pii_doc.read(self.client): print(pii_doc) print("pii_doc.title: ", pii_doc.title) pii_doc.write() else: print("Read document failed.")
def find_abstract(doi): client = elsevier_auth() ## ScienceDirect (full-text) document example using DOI doi_doc = FullDoc(doi=doi) if doi_doc.read(client): print("doi_doc.title: ", doi_doc.title) print("doi_doc.abstract: ", doi_doc.data['coredata']['dc:description']) doi_doc.write() else: print("Read document failed.")
def get_authors_data_by_doi(self, doi): doi_doc = FullDoc(doi=doi) if doi_doc.read(self.client): print("doi_doc.title: ", doi_doc.title) doi_doc.write() else: print("Read document failed.") return doi id = None if not 'pubmed-id' in doi_doc._data.keys(): print("no pubmed-id, trying with title") # try with title Entrez.email = '*****@*****.**' query = doi_doc.title handle = Entrez.esearch(db='pubmed', retmode='xml', term=query) results = Entrez.read(handle) if int(results['Count']) > 0: id = results['IdList'] else: id = doi_doc._data['pubmed-id'] if id != None: Entrez.email = '*****@*****.**' handle = Entrez.efetch(db='pubmed', retmode='xml', id=id) results = Entrez.read(handle) print(results) if len(results['PubmedArticle']) > 0 and ( 'MedlineCitation' in results['PubmedArticle'][0].keys() ) and ('Article' in results['PubmedArticle'][0]['MedlineCitation'].keys()): if 'AuthorList' in results['PubmedArticle'][0][ 'MedlineCitation']['Article'].keys(): authors_list = results['PubmedArticle'][0][ 'MedlineCitation']['Article']['AuthorList'] dates = results['PubmedArticle'][0]['PubmedData'][ 'History'] else: print("no authors list {}".format( results['PubmedArticle'][0]['MedlineCitation'] ['Article'])) return doi else: print("missing keys") return doi else: print("no pubmed id") return doi return authors_list
def get_doc(self, dtype, identity): """ This method retrieves a 'Doc' object from the Elsevier API. The doc object contains metadata and full-text information about a publication associated with a given PII. Parameters: ----------- dtype(str,required): The type of identification string being used to access the document. (Almost always PII in our case.) identity: The actual identification string/ PII that will be used to query. """ if dtype == 'pii': doc = FullDoc(sd_pii = identity) elif dtype == 'doi': doc= FullDoc(doi = identity) if doc.read(ElsClient(self.API_list[0])): #print ("doc.title: ", doc.title) doc.write() else: print ("Read document failed.") return doc
config = json.load(config_file) GET_ALL = config[ 'get_all'] # False gets one chunk (25) True gets all or max (5000) FULL_TEXT = config['full_text'] # Save fulltext OPEN_ACCESS = config[ 'open_access'] # Search only openaccess documents (so we can get the full text) # "public policy AND (impact OR result OR evaluation OR evidence) AND (climate OR environment)" query = config['query'] if OPEN_ACCESS: query = "openaccess(1) AND " + query client = ElsClient(config['api_key']) doc_srch = ElsSearch(query, 'sciencedirect') doc_srch.execute(client, get_all=GET_ALL) for doc in doc_srch.results: doi = doc['dc:identifier'] print(doi) if FULL_TEXT: ## ScienceDirect (full-text) document example using DOI doi_doc = FullDoc(doi=doi) if doi_doc.read(client): doi_doc.write() else: print("Read full-text failed for DOI", doi) print("# Found", len(doc_srch.results), "results.")
print ("Read affiliation failed.") ## Scopus (Abtract) document example # Initialize document with ID as integer scp_doc = AbsDoc(scp_id = 84872135457) if scp_doc.read(client): print ("scp_doc.title: ", scp_doc.title) scp_doc.write() else: print ("Read document failed.") ## ScienceDirect (full-text) document example using PII pii_doc = FullDoc(sd_pii = 'S1674927814000082') if pii_doc.read(client): print ("pii_doc.title: ", pii_doc.title) pii_doc.write() else: print ("Read document failed.") ## ScienceDirect (full-text) document example using DOI doi_doc = FullDoc(doi = '10.1016/S1525-1578(10)60571-5') if doi_doc.read(client): print ("doi_doc.title: ", doi_doc.title) doi_doc.write() else: print ("Read document failed.") ## Load list of documents from the API into affilation and author objects. # Since a document list is retrieved for 25 entries at a time, this is # a potentially lenghty operation - hence the prompt.
else: n += 1 # get url url = pii_doc.data["coredata"]["link"][1]["@href"] # save data abs = pd.DataFrame([url, title, text]) abstract.loc[len(abstract)] = [url, title, text] abs.to_csv(r'abstracts1.txt', sep='\t', mode='a', header=False, index=False) print(str(j) + ") pii_doc.title: ", title) print("pii_doc.description: ", text) print("pii_doc.url: ", url) pii_doc.write() j += 1 else: print("Read document failed.") except (ValueError, Exception) as e: print(str(j) + ") " + str(e)) append_report( reportName, "Fail reading document " + str(j) + ", pii: " + str(p) + "\n") append_report(reportName, "Error message: " + str(e) + "\n") err += 1 j += 1 append_report(reportName, "Total " + str(n) + " null abstract \n\n") ntotal += n # output results