def fetch_details(pid): email = '' url = 'http://www.ncbi.nlm.nih.gov/pubmed/' + str(pid) lookup = PubMedLookup(url, email) publication = Publication(lookup) return publication
def pubmed_citation(args=sys.argv[1:], out=sys.stdout): """Get a citation via the command line using a PubMed ID or PubMed URL""" parser = argparse.ArgumentParser( description='Get a citation using a PubMed ID or PubMed URL') parser.add_argument('query', help='PubMed ID or PubMed URL') parser.add_argument('-m', '--mini', action='store_true', help='get mini citation') parser.add_argument('-e', '--email', action='store', help='set user email', default='') args = parser.parse_args(args=args) lookup = PubMedLookup(args.query, args.email) publication = Publication(lookup, resolve_doi=False) if args.mini: out.write(publication.cite_mini() + '\n') else: out.write(publication.cite() + '\n')
def search(PubMedID): # Retrieve a PubMed record: # NCBI will contact user by email if excessive queries are detected """ Retrieve a PubMed record using its PubMed ID or PubMed URL. (e.g., '22331878' or 'http://www.ncbi.nlm.nih.gov/pubmed/22331878') """ email = '' #url = 'http://www.ncbi.nlm.nih.gov/pubmed/22331878' #lookup = PubMedLookup(url, email) #print(type(PubMedID)) lookup = PubMedLookup(PubMedID, email) # Create a Publication object: publication = Publication( lookup) # Use 'resolve_doi=False' to keep DOI URL # Access the Publication object's attributes: tupleoutput = [( PubMedID, publication.title, publication.authors, publication.journal, publication.year, publication.month, publication.day, #publication.url, # doesn't seem to work w/ dataframe publication.pubmed_url, publication.cite(), publication.cite_mini(), repr(publication.abstract), 0, )] return tupleoutput
def setUpClass(cls): # Get publication record email = '' cls.pmid = '22331878' cls.lookup = PubMedLookup(cls.pmid, email) cls.master_record = Publication(cls.lookup) # Set frequently used expected results cls.authors = 'Goodspeed D, Chehab EW, Min-Venditti A, Braam J, ' \ 'Covington MF' cls.issue = '12' cls.journal = 'Proc Natl Acad Sci U S A' cls.pages = '4674-7' cls.title = 'Arabidopsis synchronizes jasmonate-mediated defense ' \ 'with insect circadian behavior.' cls.volume = '109' cls.year = '2012' cls.citation_data = { 'authors': cls.authors, 'year': cls.year, 'title': cls.title, 'journal': cls.journal, 'volume': cls.volume, 'issue': cls.issue, 'pages': cls.pages, } cls.base_citation = '{authors} ({year}). {title} {journal}'.format( **cls.citation_data)
def SubmitPMIDList_To_pubmed(Inputfile, Format="pubtator", Bioconcept=""): json = {} # load pmids f_in = open(Inputfile, "r") f2_out = open("error_log.txt", "w") cnt = 1 f_out = open("General_pubmed_result.txt", "w") while True: line = f_in.readline().rstrip("\n") if not line: break time.sleep(random.randint(3, 10)) # NCBI will contact user by email if excessive queries are detected email = '*****@*****.**' url = 'http://www.ncbi.nlm.nih.gov/pubmed/' + line try: lookup = PubMedLookup(url, email) publication = Publication(lookup) return publication # print( # """ # TITLE:\n{title}\n # AUTHORS:\n{authors}\n # JOURNAL:\n{journal}\n # YEAR:\n{year}\n # MONTH:\n{month}\n # DAY:\n{day}\n # URL:\n{url}\n # PUBMED:\n{pubmed}\n # CITATION:\n{citation}\n # MINICITATION:\n{mini_citation}\n # ABSTRACT:\n{abstract}\n # """ # .format(**{ # 'title': publication.title, # 'authors': publication.authors, # 'journal': publication.journal, # 'year': publication.year, # 'month': publication.month, # 'day': publication.day, # 'url': publication.url, # 'pubmed': publication.pubmed_url, # 'citation': publication.cite(), # 'mini_citation': publication.cite_mini(), # 'abstract': repr(publication.abstract), # })) except: print("pubmed error!") f_out.close() f_in.close() f2_out.close()
def SubmitPMIDList_To_pubmed(Input_PMID, Format="pubtator", Bioconcept=""): json = {} # load pmids time.sleep(random.randint(3, 10)) # NCBI will contact user by email if excessive queries are detected email = '*****@*****.**' url = 'http://www.ncbi.nlm.nih.gov/pubmed/' + Input_PMID try: lookup = PubMedLookup(url, email) # publication = {} # publication['title'] = lookup.record['Title'] # publication['Title'] = lookup.record['Title'] publication = Publication(lookup) return publication # print( # """ # TITLE:\n{title}\n # AUTHORS:\n{authors}\n # JOURNAL:\n{journal}\n # YEAR:\n{year}\n # MONTH:\n{month}\n # DAY:\n{day}\n # URL:\n{url}\n # PUBMED:\n{pubmed}\n # CITATION:\n{citation}\n # MINICITATION:\n{mini_citation}\n # ABSTRACT:\n{abstract}\n # """ # .format(**{ # 'title': publication.title, # 'authors': publication.authors, # 'journal': publication.journal, # 'year': publication.year, # 'month': publication.month, # 'day': publication.day, # 'url': publication.url, # 'pubmed': publication.pubmed_url, # 'citation': publication.cite(), # 'mini_citation': publication.cite_mini(), # 'abstract': repr(publication.abstract), # })) except: print("pubmed error!")
def pmid_article(ref, user=None): article_array = [] if user: if user.email is not None: for pmid in Reference.pmid(ref): url = "http://www.ncbi.nlm.nih.gov/pubmed/" + str(pmid) lookup = PubMedLookup(url, user.email) publication = Publication(lookup) article_array.append(publication) fetch = PubMedFetcher() for pmid in Reference.pmid(ref): article = fetch.article_by_pmid(pmid) article_array.append(article) return article_array
def get_PMID(PMID): ''' Gets publication title from NIH Pubmed lookup of the PMID Input: <str> PMID Output: <str> piblicatation title ''' email = '' url = 'http://www.ncbi.nlm.nih.gov/pubmed/' + PMID try: lookup = PubMedLookup(url, email) publication = Publication(lookup) return (publication.title) except: return "Title not found"
def search(PubMedID): # Retrieve a PubMed record: # NCBI will contact user by email if excessive queries are detected """ Retrieve a PubMed record using its PubMed ID or PubMed URL. (e.g., '22331878' or 'http://www.ncbi.nlm.nih.gov/pubmed/22331878') """ email = '' #url = 'http://www.ncbi.nlm.nih.gov/pubmed/22331878' #lookup = PubMedLookup(url, email) lookup = PubMedLookup(PubMedID, email) # Create a Publication object: publication = Publication( lookup) # Use 'resolve_doi=False' to keep DOI URL # Access the Publication object's attributes: tupleoutput = (""" TITLE:\n{title}\n AUTHORS:\n{authors}\n JOURNAL:\n{journal}\n YEAR:\n{year}\n MONTH:\n{month}\n DAY:\n{day}\n URL:\n{url}\n PUBMED:\n{pubmed}\n CITATION:\n{citation}\n MINICITATION:\n{mini_citation}\n ABSTRACT:\n{abstract}\n """.format( **{ 'title': publication.title, 'authors': publication.authors, 'journal': publication.journal, 'year': publication.year, 'month': publication.month, 'day': publication.day, 'url': publication.url, 'pubmed': publication.pubmed_url, 'citation': publication.cite(), 'mini_citation': publication.cite_mini(), 'abstract': repr(publication.abstract), })) return tupleoutput
def count_plasmids_per_year(pmid_counts, doi_counts, pmid_to_doi): start = time.time() year_count_dict = {} with open('PMC-ids.csv') as f: lines = f.readlines() del lines[0] PMID_year_dict = {} DOI_year_dict = {} for p in lines: k = p.split(',') temp_dict = {k[9]: k[3]} temp_dict2 = {k[7]: k[3]} PMID_year_dict.update(temp_dict) DOI_year_dict.update(temp_dict2) print("Length of pmid_counts =", len(pmid_counts)) for i in pmid_counts: if str(i) in PMID_year_dict: year = int(PMID_year_dict[str(i)]) elif str(pmid_to_doi[i]) in DOI_year_dict: year = int(DOI_year_dict[str(pmid_to_doi[i])]) else: email = '' url = 'http://www.ncbi.nlm.nih.gov/pubmed/' + str(i) lookup = PubMedLookup(url, email) publication = Publication( lookup) # Use 'resolve_doi=False' to keep DOI URL year = publication.year if year in year_count_dict: temp_dict = {year: year_count_dict[year] + pmid_counts[i]} year_count_dict.update(temp_dict) else: temp_dict = {year: pmid_counts[i]} year_count_dict.update(temp_dict) end = time.time() print("Fetching publication year for all PMIDs took", end - start, "seconds") print("") return year_count_dict
def getMeshIDs(data, email): meshTerms = [] article_ids = parseUrlforIDs(data) for id in article_ids: url = 'http://www.ncbi.nlm.nih.gov/pubmed/' + id lookup = PubMedLookup(url, email) #call the pubmed library to look for the article data publication = Publication( lookup) # Use 'resolve_doi=False' to keep DOI URL xmlDict = publication.get_pubmed_xml() #parse the xml dictionary and get the data for meshcode in xmlDict['PubmedArticleSet']['PubmedArticle'][ 'MedlineCitation']['MeshHeadingList']['MeshHeading']: meshTerms.append(meshcode['DescriptorName']['#text']) saveOutput(meshTerms)
def loadADEcorpus(negFile, posFile, avilableLabels=None): email = '' urlPre = 'http://www.ncbi.nlm.nih.gov/pubmed/' allAbstract = {} currentSentList = [] sentPreList = [] sentLatList = [] labelList = [] i = 0 allFiles = [negFile, posFile] for fileId in range(len(allFiles)): with open(allFiles[fileId], 'r') as fin: for line in fin: if fileId == 0: sentence, docID, label = readNegLine(line) else: sentence, docID, label = readPosLine(line) try: if docID not in allAbstract: fullUrl = urlPre + docID lookup = PubMedLookup(fullUrl, email) publication = Publication(lookup) abstract = publication.abstract allAbstract[docID] = abstract else: abstract = allAbstract[docID] abstractSplit = abstract.split(sentence) if len(abstractSplit) == 2: print(sentence) currentSentList.append(sentence) print(abstractSplit[0]) sentPreList.append(abstractSplit[0]) print(abstractSplit[1]) sentLatList.append(abstractSplit[1]) labelList.append(label) except: print('cant find file') i += 1 print(i, len(currentSentList)) return [currentSentList, sentPreList, sentPreList, labelList, allAbstract]
def recup_Abstract(gene): '''Cette fonction permet de récupérer des abstracts dans un fichier''' email = '*****@*****.**' url = 'http://www.ncbi.nlm.nih.gov/pubmed/' ecrire_Id(gene) with open('abstract.txt','w') as nouveau_fichier: with open('pubmed.txt','r') as mon_fichier : str_id=mon_fichier.read() liste_id=str_id.split('\t') for idpm in liste_id : lookup = PubMedLookup(url+idpm, email) publication = Publication(lookup) nouveau_fichier.write('{pubmed}\t{title}\t{year}\t{journal}\t{abstract}\n' .format(**{ 'pubmed': idpm, 'title': publication.title, 'year': publication.year, 'journal': publication.journal, 'abstract': publication.abstract, })) return nouveau_fichier
def pubmed_search(request): publications = LabPublication.objects.all().order_by('-year') if 'pubmed_id' in request.GET: paper_id = request.GET['pubmed_id'] if not paper_id: message = '<strong>No PubMed ID or PubMed URL provided!</strong>' else: email = request.user.email url = paper_id try: lookup = PubMedLookup(url, email) except Exception: message = '<strong>Error using your PubMed ID/URL</strong>' return render(request, 'public_pages/publications.html', {'publications': publications, 'message': message}) publication = Publication(lookup) if publication == '': message = '<strong>No match found for your pubmed ID</strong>' return render(request, 'public_pages/publications.html', {'publications': publications, 'message': message}) user = request.user title = publication.title authors = publication.authors journal = publication.journal year = publication.year journal_url = publication.url pubmed = publication.pubmed_url citation = publication.cite() mini_citation = publication.cite_mini() abstract = repr(publication.abstract) try: new_publication, created = LabPublication.objects.get_or_create(user = user, title = title, authors = authors, journal = journal, year = year, journal_url = journal_url, pubmed = pubmed, citation = citation, mini_citation = mini_citation, abstract = abstract) except: message = '<strong>Could not add your publication. Are you trying to add a duplicate entry?</strong>' return render(request, 'public_pages/publications.html', {'publications': publications, 'message': message}) if created == '': message = '<strong>Could not add your publication. Are you trying to add a duplicate entry?</strong>' return render(request, 'public_pages/publications.html', {'publications': publications, 'message': message}) else: return HttpResponseRedirect('publications') return render(request, 'public_pages/publications.html', {'publications': publications, 'message': message})
def pubmed_url(args=sys.argv[1:], resolve_doi=True, out=sys.stdout): """ Get a publication URL via the command line using a PubMed ID or PubMed URL """ parser = argparse.ArgumentParser( description='Get a publication URL using a PubMed ID or PubMed URL') parser.add_argument('query', help='PubMed ID or PubMed URL') parser.add_argument('-d', '--doi', action='store_false', help='get DOI URL') parser.add_argument('-e', '--email', action='store', help='set user email', default='') args = parser.parse_args(args=args) lookup = PubMedLookup(args.query, args.email) publication = Publication(lookup, resolve_doi=args.doi) out.write(publication.url + '\n')
pmid = "10359080" # NCBI will contact user by email if excessive queries are detected output = open('./abstract.tsv', 'a') email = '*****@*****.**' url = 'http://www.ncbi.nlm.nih.gov/pubmed/' counter = 254 #9221 #9237 #9602 #9624 #9954 #9964 #9679 #9723 #9756 #9784 #9836 #9964 #10746 temp = [] with open('./pubmed.tsv', 'r') as file: lines = file.readlines() temp.append(lines) for i in range(10492, len(temp[0])): pmid = re.sub('\s', '', temp[0][i]) lookup = PubMedLookup(url + pmid, email) publication = Publication(lookup) counter -= 1 # if (counter != 10452): # if (pmid not in ['11779628','14968308']): print(counter) print(pmid) output.write('{pubmed}\t{title}\t{year}\t{journal}\t{abstract}\n'.format( **{ 'pubmed': pmid, 'title': publication.title, 'year': publication.year, 'journal': publication.journal, 'abstract': publication.abstract, }))
from pubmed_lookup import PubMedLookup import pandas as pd email = '*****@*****.**' k = [ '12194857', '28835279', '17683935', '17984323', '25791428', '20466727', '24667209', '23425014', '22874558' ] abst = [] title = [] year = [] for i in k: url = 'http://www.ncbi.nlm.nih.gov/pubmed/' + i lookup = PubMedLookup(url, email) from pubmed_lookup import Publication publication = Publication(lookup) # Use 'resolve_doi=False' to keep DOI UR #print("ID :"+i) #print("Abstract :"+repr(publication.abstract)) abst.append(repr(publication.abstract)) title.append(publication.title) year.append(publication.year) d = {'PMID': k, 'Title': title, 'Year': year, 'Abstract': abst} df = pd.DataFrame(data=d) df.to_csv('CSV_9.csv', sep='\t') print("Done")
def handle(self, *args, **options): print('importing {0}'.format(options['path'])) file = os.path.normpath(options['path']) df = pd.read_csv(file, header=None) with open(options['search_function'], "r") as sf_file: search_function = sf_file.readlines() import_record = PubmedImport() import_record.import_date = timezone.now() import_record.search_function = search_function import_record.save() import_id = import_record.id failed_ids = list() for index, record in tqdm(df.iterrows(), total=df.shape[0]): pmid = record[0] find_pmid = PubmedImportedArticle.objects.filter(pmid=pmid) if len(find_pmid) > 0: continue try: email = '' url = 'http://www.ncbi.nlm.nih.gov/pubmed/{0}'.format(pmid) lookup = PubMedLookup(url, email) publication = Publication(lookup, resolve_doi=False) pia = PubmedImportedArticle() pia.pmid = pmid year = self.get_int(publication.year) month = self.get_int(publication.month) day = self.get_int(publication.day) pia.pub_date = datetime(year, month, day, 0, 0, 0, 0, tzinfo=pytz.UTC) pia.title = publication.title pia.authors = publication.authors pia.journal = publication.journal pia.citation = publication.cite() # pia.mini_citation = publication.cite_mini() pia.url = publication.url pia.pubmed_url = publication.pubmed_url pia.abstract = repr(publication.abstract) pia.screened = False pia.tagged = False pia.landmark = False pia.pmimport = import_record pia.save() except: print('\nPMID {0} failed'.format(pmid)) failed_ids.append(pmid) np.savetxt('failed_ids.txt', np.asarray(failed_ids), fmt='%d') print('finished') np.savetxt(np.asarray(pmid))
def test_parse_pubmed_url(self): self.assertEqual( PubMedLookup.parse_pubmed_url(self.pubmed_url), self.pmid)
def main(): p = "../../assets/nbibs" nbibs = [os.path.join(p, f) for f in os.listdir(p) if ".nbib" in f] papers = [] paper = {} for f in nbibs: print(f"reading {f}") with open(f) as fp: for l in fp: l = l.rstrip() if l == "": # blank line, new paper if "id" in paper: # save old paper papers.append(copy.copy(paper)) # blank paper paper = {} elif l[4] == "-": name, data = l.split("-", 1) name = name.rstrip() data = data.strip() if name == "ID": # non-pubmed record paper["id"] = data authors_from_pubmed = None elif name == "PMID": # pubmed record paper["id"] = data paper["pmid"] = data _f = os.path.join(p, f"{data}.pkl") if os.path.exists(_f): # already saved the author list with open(_f, "rb") as fp2: authors_from_pubmed = pickle.load(fp2) else: # getting the author list with special chars print(f"looking up {data}") url = f"http://www.ncbi.nlm.nih.gov/pubmed/{data}" lookup = PubMedLookup(url, "") publication = Publication(lookup, resolve_doi=False) authors = [ a.split() for a in publication._author_list ] x = lambda a: all((a == a.upper(), "ENIGMA" not in a, "CNV" not in a)) y = lambda a: ", " + ". ".join(a) + "." if x( a) else a authors = [[y(a) for a in b] for b in authors] authors = [ " ".join(a).replace(" ,", ",") for a in authors ] print("authors ->", authors) with open(_f, "wb") as fp2: pickle.dump(authors, fp2) authors_from_pubmed = copy.copy(authors) paper["authors"] = authors_from_pubmed elif name == "VI": paper["volume"] = data elif name == "IP": paper["issue"] = data elif name == "DP": paper["year"] = data.split()[0] paper["sort"] = "%s" % paper["year"] try: paper["month"] = data.split()[1] month_number = dict( zip( [ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", ], range(12), ))[paper["month"]] paper["sort"] = "%s-%02d" % ( paper["year"], month_number + 1, ) paper["month"] = str(paper["month"]) except IndexError: pass elif name == "TI": new_name = "title" paper[new_name] = data elif name == "AB": new_name = "abstract" paper[new_name] = data elif name == "FAU" and authors_from_pubmed is None: if "authors" not in paper: paper["authors"] = [] surname, fns = data.split(",", 1) fns = fns.strip() fns = fns.split(" ") if " " in fns else [fns] fns = " ".join(f"{n[0].upper()}." for n in fns) author = f"{surname}, {fns}" paper["authors"].append(author) elif name == "JT": data = re.sub(r"\([^)]*\)", "", data) words = data.split() ignore = ("on", "in", "of", "the", "and") words = [ w.title() if w not in ignore else w for w in words ] words = " ".join(words) words = words.replace(".", ":") paper["journal"] = words elif "[doi]" in data and "doi" not in paper: paper["doi"] = data.split()[0] elif name == "PG": paper["first_page"] = data if "-" in data: f, l = data.split("-") if len(l) < len(f): d = len(f) - len(l) l = f[:d] + l paper["first_page"], paper["last_page"] = (f, l) elif name == "ED": paper["editor"] = data elif name == "CI": paper["city"] = data elif name == "CC": paper["state"] = data elif name == "CY": paper["publisher"] = data elif name == "CO": paper["collection"] = data elif name == "BN": paper["book"] = data else: if name in ("TI", "AB"): paper[new_name] += " " + l.strip() papers.append(paper) for paper in papers: authors = copy.copy(paper["authors"]) if len(authors) > 1: authors[-1] = f"& {authors[-1]}" if len(authors) > 2: authors = ", ".join(authors) else: authors = " ".join(authors) authors = authors.replace("Mathias, S.", "<b>Mathias, S.</b>") authors = authors.replace("<b>Mathias, S.</b> R.", "<b>Mathias, S. R.</b>") paper["authors"] = authors if "pmid" in paper: if paper["pmid"] == "24389264": paper["doi"] = "10.2741/S417" if "doi" in paper: paper["doi_link"] = "https://www.doi.org/" + paper["doi"] if "pmid" in paper: paper[ "pmid_link"] = "https://www.ncbi.nlm.nih.gov/pubmed/" + paper[ "pmid"] if "journal" in paper: if paper["journal"] == "Frontiers in Bioscience": paper["journal"] = "Frontiers in Bioscience (Scholar Edition)" paper["journal"] = (paper["journal"].replace(" : Cb", "").replace( " : the", "").replace( " Journal of the Association of European Psychiatrists", "").replace( " : Official Publication of the American College of", "").replace(" Official Journal of the Society For", "").replace("Jama", "JAMA")) paper["title"] = paper["title"][0] + paper["title"][1:].lower() paper["title"] = (paper["title"].replace("african", "African").replace( "american", "American").replace("qtl", "QTL").replace( "enigma", "ENIGMA").replace("mri ", "MRI ")) for s in string.ascii_lowercase: paper["title"].replace(f": {s}", f": {s.upper()}") paper["authors"] = paper["authors"].replace(".., ", "") papers = [p for p in papers if int(p["year"]) >= 2010] with open("../../_data/my_papers.yaml", "w") as fw: fw.write("my_papers:\n") for paper in papers: if "Correction:" not in paper["title"]: for k, v in paper.items(): v = v.replace('"', "'") s = f"""{k}: "{v}"\n""" if k == "id": s = "\n - " + s else: s = " " + s fw.write(s)
def test_pmid_and_url_return_same_record(self): self.assertEqual( PubMedLookup(self.pmid, self.email).record, PubMedLookup(self.pubmed_url, self.email).record)
def get_publication(self, pubmedID: int) -> Publication: lookup = PubMedLookup(pubmedID, self.email) return Publication(lookup)
def test_parse_pubmed_url(self): self.assertEqual(PubMedLookup.parse_pubmed_url(self.pubmed_url), self.pmid)
def test_invalid_query(self): with self.assertRaises(RuntimeError): PubMedLookup('not a valid query', self.email)