Пример #1
0
def createTable(query):

    if not query:
        return "<h3> No query </h3>"

    MAX_COUNT = 100
    pubmedquery = query.replace("-", "\-")
    Entrez.email = "*****@*****.**"
    h = Entrez.esearch(db="pubmed", term=pubmedquery, retmax=MAX_COUNT)
    result = Entrez.read(h)
    ids = result["IdList"]
    h = Entrez.efetch(db="pubmed", id=ids, rettype="medline", retmode="text")
    records = Medline.parse(h)

    tableContent = ""

    for record in records:
        try:
            tableContent += (
                "<tr><td width='22%'>" + str(record.get("TI")) + "</td>"
                "<td width='5%'>" + str(record.get("DP")) + "</td>"
                "<td width='5%'>" + str(writers(record.get("FAU"))) + "</td>"
                "<td width='5%'>" + str(record.get("JT")) + "</td>"
                "<td width='5%'>" + str(query) + "</td>"
                "<td>"
                "<a href='http://www.ncbi.nlm.nih.gov/pubmed/"
                + str(record.get("PMID"))
                + "'><img src='PubMed.png' height='75px' width='75px' alt='PubMed' align='right'/></a>"
                + str(record.get("AB"))
                + "</td></tr>"
            )
        except (TypeError):
            continue

    return tableContent
Пример #2
0
def getFormattedPubList(query):
	global args
	if args.v:
		print("INFO:PUBLIST : starting to extract details about publications")
	try:
		if args.v:
			print("INFO:PUBLIST : searching the query")
		results = search(query)
		if args.v:
                        print("INFO:PUBLIST : extracting the all pmids")
		id_list = results['IdList']
		if args.v:
			print("INFO:PUBLIST : Fetching details of top "+args.maxPub+" from "+str(len(id_list))+" results ranked by relevance")
		ids = ','.join(id_list)
		Entrez.email = args.mail
		handle = Entrez.efetch(db='pubmed',
			retmode='text',
			rettype="medline",
			retmax=args.maxPub,
			id=ids)
		print("INFO:PUBLIST : Parsing fetched details")
		papers = Medline.parse(handle)
		formattedList=list()
		notEmpty=True
		iterTest=1
		print("INFO:PUBLIST : Initiating the iteration to format the details")
		while notEmpty==True:
			errorHTTP=True
			while errorHTTP==True:
				errorHTTP=False
				try:
					paper=next(papers)
					pos=str(iterTest)
					if args.v:
						print('###==> article number: '+pos+"\n")
					iterTest+=1
					title=paper.get("TI", "?")
					pmid=paper.get("PMID", "?")
					authors=';'.join(paper.get("FAU", "?"))
					date=paper.get("DP", "?")
					journal=paper.get("JT", "?")
					cited=getCitations(pmid)
					if args.ab:
						abstract=paper.get("AB", "?")
						formattedList.append([pos,pmid,title,authors,journal,date,cited,abstract])
					else:
						formattedList.append([pos,pmid,title,authors,journal,date,cited])
				except httplib.IncompleteRead:
					errorHTTP=True
					print('ERROR:PUBLIST : httplib.incompletedRead'+"\n")
				except :
					notEmpty=False
					if args.v:
						print('INFO:PUBLIST : Final error because the list of papers is finished'+"\n")
		if args.v:
			print("INFO:PUBLIST : Ending the iteration to format the details")
		return formattedList
	except Exception as e:
		print("ERROR:PUBLIST : <== Unexpected ==> "+"\n"+str(e))
		return None
Пример #3
0
def store_abstracts_for_query(query,query_tag,maxN=None,preview_only=False):
	# if query_tag=="":
	# 	simpleQuery=" ".join(map(lambda x:x.name,queryTerms))
	# else:
	# 	simpleQuery=query_tag
	# query=pg.build_query(queryTerms)
	print "will search",query
	Entrez.email = "*****@*****.**"
	search_results = Entrez.read(Entrez.esearch(db="pubmed",
												term=query,
												reldate=10*365, datetype="pdat",
												usehistory="y"))
	count = int(search_results["Count"])
	print "Found %i results" % count
	if maxN!=None and maxN<count:
		count=maxN
		print "Only keeping first",count,"abstracts"
	if preview_only:
		return
	sys.stdout.flush()
	batch_size = 50
	for start in range(0,count,batch_size):
			end = min(count, start+batch_size)
			print "Going to download record %i to %i" % (start+1, end)
			sys.stdout.flush()
			fetch_handle = Entrez.efetch(db="pubmed",
										 rettype="medline", retmode="text",
										 retstart=start, retmax=batch_size,
										 webenv=search_results["WebEnv"],
										 query_key=search_results["QueryKey"])
			records=Medline.parse(fetch_handle)
			for r in records:
				pubmed_to_pg.store_medline_entry(r,query_tag)
Пример #4
0
def top_papers(papers, outpath=None, delim="\t", top=20):
    """This function fetches all the relevant PubMed info for each PMID in 'papers' and 
    1) puts it into a list and 2) outputs it to a file named in outpath."""
    #
    # Can be used with SP & GOA data

    papers_annots = [(len(papers[p]), p) for p in papers]
    papers_annots2 = []

    papers_annots.sort()
    idlist = [p[1] for p in papers_annots[-top:]]
    Entrez.email = "*****@*****.**"
    h = Entrez.efetch(db="pubmed",
                      id=",".join(idlist),
                      rettype="medline",
                      retmode="text")
    medrecs = list(Medline.parse(h))
    titles = [medrec.get("TI", "?") for medrec in medrecs]
    years = [medrec.get("DP", "?") for medrec in medrecs]
    journals = [medrec.get("JT", "?") for medrec in medrecs]
    for p, title, year, journal in zip(papers_annots[-top:], titles, years,
                                       journals):
        papers_annots2.append(
            (p[0], p[1], title, year.split()[0].strip(), journal))
    if outpath:
        fout = open(outpath, "w")
        print >> fout, "num proteins\tpubmed ID\tTitle\tYear\tJournal"
        for p in papers_annots2:
            print >> fout, "%d\t%s\t%s\t%s\t%s" % p
        fout.close()
    #papers_annots2 = [(# all annotations, PMID, Title, Year, Journal)]
    return papers_annots2
Пример #5
0
    def __init__(self, pmids):
        
         Entrez.email = '*****@*****.**'

         ## pmids is a list (array of pmid)
         handle = Entrez.efetch(db='pubmed', id=pmids, rettype='medline', retmode='text')
         self.records = Medline.parse(handle)
Пример #6
0
def getMedlineList(pmids):

    """
    This function takes a list of article-ids and returns a list of
    MedLine articles that contains an abstract.
    """

    records = []
    cleaned_records = []
    listLength = len(pmids)

    Entrez.email = '*****@*****.**'

    for i in range(0, listLength, 650):
        tempList = pmids[i:i + 650]
        handle = Entrez.efetch(db='pubmed', id=tempList,rettype='medline', retmode='text')
        try:
            records.extend(list(Medline.parse(handle)))
        except:
            IOmodule.writeOutTxt(_mainFolder+'/'+'errordir_medline_records', pmids[i], '')

        print 'Downloaded',len(records),'MedLine articles.',str(listLength-len(records)),'remaining...'

    for article in records:
        if 'AB' in article:
            cleaned_records.append(article)
    
    print 'Returned',len(cleaned_records),'MedLine articles containing an abstract.'
    return cleaned_records
def get_pubmed_document(pubmed_ids, source='pubmed', include_pubtator=True):
    Entrez.email = settings.ENTREZ_EMAIL

    if type(pubmed_ids) == list:
        ids = [str(doc_id) for doc_id in pubmed_ids]
    else:
        ids = [str(pubmed_ids)]

    h = Entrez.efetch(db='pubmed', id=ids, rettype='medline', retmode='text')
    records = Medline.parse(h)

    # Reference to abbreviations: http://www.nlm.nih.gov/bsd/mms/medlineelements.html
    for record in records:
        if record.get('TI') and record.get('AB') and record.get('PMID') and record.get('CRDT'):
            #if Document.objects.pubmed_count(record.get('PMID')) is 0:
            title = ' '.join( pad_split(record.get('TI')) )
            abstract = ' '.join( pad_split(record.get('AB')) )

            doc, doc_c = Document.objects.get_or_create(document_id=record.get('PMID'))
            doc.title = title
            doc.source = source
            doc.save()

            sec, sec_c = Section.objects.get_or_create(kind='t', document=doc)
            sec.text = title
            sec.save()

            sec, sec_c = Section.objects.get_or_create(kind='a', document=doc)
            sec.text = abstract
            sec.save()

            if include_pubtator:
                doc.init_pubtator()
Пример #8
0
def get_wikiref(pmid):
    """ Returns the Wiki cite journal entry for a given Pubmed ID """
    
    handle = Entrez.efetch(db="pubmed", id=pmid, rettype="medline", retmode="text")
    records = Medline.parse(handle)
    records = list(records)
    
    import datetime
    now = datetime.datetime.now()
    jetzt= now.strftime("%Y-%m-%d")
    
    # generate the {{cite journal}} format
        
    for rec in records:
        aut = rec["AU"]
        firstauthor = aut.pop(0)
        coauthors = ", ".join(aut)
        
        # get date of publication
        # CRDT
        datee = rec["CRDT"][0].split('/')[0]
        #if datee == '':
        #    datee = rec["PD"]
        
        outstring = "{{cite journal|title=%s|journal=%s|year=%s|author=%s|coauthors=%s|volume=%s|pages=%s|id=PMID %s|accessdate=%s}}" % \
                    (rec["TI"], rec["JT"], datee, firstauthor, coauthors, rec["VI"], rec["PG"], pmid, jetzt)
        
        # example:
        #{{cite journal|title=|journal=|date=2008/07/31/|first=Cyril|last=Herry|coauthors=i|volume=454|issue=7204|pages=600-606|id=PMID 18615015 {{doi|10.1038/nature07166}}|url=http://www.fmi.ch/downloads/news/2008.07.11.01.luthi.nature.press.release.pdf|format=|accessdate=2009-09-12 }}
        
    return outstring
Пример #9
0
def pubm(query):
    handle = Entrez.esearch(db="pubmed", term="%s" % query, retmax=1000000000)
    Entrez.email = "*****@*****.**"  # Always tell NCBI who you are
    record = Entrez.read(handle)
    idlist = record["IdList"]
    handle = Entrez.efetch(db="pubmed", id=idlist, rettype="medline", retmode="text")
    records = Medline.parse(handle)
    records = list(records)
    file = open("result.txt", "w")
    for record in records:
        print("title:", record.get("TI", "?"))
        print("authors:", record.get("AU", "?"))
        print("source:", record.get("SO", "?"))
        print("")
        tit = "title:%s" % record.get("TI", "?")
        d = "ID:", record.get("PMID", "?")
        content = "%s\n%s\n\n" % (tit, d)
        file.write(content)
    file.close()


    root = Tk()
    S = Scrollbar(root)
    fileview = open("result.txt" , "r")
    T = Text(root, height=50, width=500)
    S.pack(side=RIGHT, fill=Y)
    T.pack(side=LEFT, fill=Y)
    S.config(command=T.yview)
    S.config(command=T.xview)
    T.config(yscrollcommand=S.set)
    T.config(xscrollcommand=S.set)
    quote = fileview.read()
    T.insert(END, quote, 'color')
    mainloop()
Пример #10
0
	def fetch(self, batchSize=100):
		"""Return a batch of results.
		"""
		if self._done:
			return []

		end = min(self._searchCount, self._searchPosition + batchSize)

		log.info("Downloading from %i to %i..." % (self._searchPosition+1, end))

		fetchHandle = Entrez.efetch(db="pubmed", rettype="medline", retmode="text", retstart=self._searchPosition, retmax=batchSize, webenv=self._searchSession, query_key=self._queryKey)
		result = Medline.parse(fetchHandle)

		papers = [paper.Paper(r) for r in result if r.get("PMID") is not None ]

		fetchHandle.close()

		log.info("... downloading done")

		self._searchPosition = self._searchPosition + batchSize

		if self._searchPosition >= self._searchCount:
			self._done = True
			log.info("Search ended.")

		return papers
def fetch(t, s):
    h = Entrez.esearch(db="pubmed", term=t, retmax=10000, sort=s)
    idList = Entrez.read(h)["IdList"]
    results = "Total publications for SA Beatson: **" + str(len(idList)) + "**\n\n"
    results += "Chronologically sorted:\n\n"

    if idList:
        handle = Entrez.efetch(db="pubmed", id=idList, rettype="medline", retmode="text")
        records = Medline.parse(handle)
        max = len(idList) + 1
        for record in records:
            title = record["TI"]
            author = ", ".join(record["AU"])
            source = record["SO"]
            pub_date = datetime.strptime(record["DA"], "%Y%m%d").date()
            pmid = record["PMID"]
            cur_pub = "| **%i.** %s\n| %s\n| %s\n| http://www.ncbi.nlm.nih.gov/pubmed/%s\n|\n" % (
                max - 1,
                title,
                author,
                source,
                pmid,
            )
            results = results + cur_pub
            max = max - 1
    return results
Пример #12
0
def retrive_record(row):

    name=row[1]+"[AUTH]"        
    handle = Entrez.esearch(db="pubmed",term=name)
    record=Entrez.read(handle)
    idlist=record["IdList"]
    

    
    handle = Entrez.efetch(db="pubmed", id=idlist, rettype="medline",
                       retmode="text")
    records = Medline.parse(handle)

    for record in records:
        temp=[]
        temp.append(row[0])
        temp.append(row[1])
        #title
        temp.append(record.get("TI","?"))
        #authors
        temp.append(record.get("AU","?"))
        #
        temp.append(record.get("AD","?"))
        #
        temp.append(record.get("DP","?"))
        #pubmed id for url
        temp.append(record.get("PMID","?"))

    return temp
Пример #13
0
def FetchIdList(ids):
    handle = Entrez.efetch(db="pubmed",
                           id=ids,
                           rettype="medline",
                           retmode="text")
    record = Medline.parse(handle)
    return record
Пример #14
0
def createTable(query):

        if not query:
                return "<h3> No query </h3>"
        
        MAX_COUNT = 100
         
        Entrez.email = '*****@*****.**'
        pubmedquery = query.replace('-','\-')
        h = Entrez.esearch(db='pubmed', term=pubmedquery, retmax=MAX_COUNT)
        result = Entrez.read(h)
        ids = result['IdList']
        if not ids:
                return "<h3> geen gevonden resultaten </h3>"
        h = Entrez.efetch(db='pubmed', id=ids, rettype='medline', retmode='text')
        records = Medline.parse(h)


        tableContent = ""
        
        for record in records:
                try:    
                        tableContent += "<tr><td width='22%'>"+str(record.get("TI"))+"</td>"\
                        "<td width='5%'>"+str(record.get("DP"))+"</td>"\
                        "<td width='5%'>"+str(writers(record.get("FAU")))+"</td>"\
                        "<td width='5%'>"+str(record.get("JT"))+"</td>"\
                        "<td width='5%'>"+str(query)+"</td>"\
                        "<td>"\
                        "<a href='http://www.ncbi.nlm.nih.gov/pubmed/"+str(record.get("PMID"))+"'><img src='PubMed.png' height='75px' width='75px' alt='PubMed' align='right'/></a>"\
                        +str(record.get("AB"))+"</td></tr>"
                except (TypeError):
                        continue;
                
        return tableContent
Пример #15
0
def index():
	ix = open_dir("indexdir")
	writer = ix.writer()
	for pfile in pubmed_files:
		print "parsing",pfile
		txt=open(project+"/"+pfile,"r")
		records=Medline.parse(txt)
		for r in records:
			if "AB" not in r:
				continue
			authors=""
			if "FAU" in r:
				authors+=",".join(r["FAU"])
			elif "AU" in r:
				authors+=",".join(r["AU"])
			else:
				firstAuthor="Unknown"
			date=datetime.datetime.strptime(r["DA"],"%Y%m%d")
			title=r["TI"]
			pmid=r["PMID"].decode("utf-8")

			writer.add_document(
				title=title.decode("utf-8"),
				path=pfile.decode("utf-8"),
				abstract=r['AB'].decode("utf-8"),
				authors=authors.decode("utf-8"),
				pmid=pmid,
				dateAdded=date
				)
	writer.commit()
	print "Index contain",ix.doc_count()
Пример #16
0
def getMeSH(url):
        query = urllib.unquote_plus(url)

        if not query:
                return "<h3> No query </h3>"
        
        MAX_COUNT = 10000
         
        Entrez.email = '*****@*****.**'
        pubmedquery = query.replace('-','\-')
        h = Entrez.esearch(db='pubmed', term=pubmedquery, retmax=MAX_COUNT)
        result = Entrez.read(h)
        ids = result['IdList']
        if not ids:
                return "<h3> geen gevonden resultaten </h3>"
        h = Entrez.efetch(db='pubmed', id=ids, rettype='medline', retmode='text')
        records = Medline.parse(h)

        MeSHCount = 0
        MeSHContent = ""
        
        for record in records:
                try:
                        if "tox" in str(record.get("MH")):
                                MeSHContent += "<h4><a href='http://www.ncbi.nlm.nih.gov/pubmed/"+str(record.get("PMID"))+"'>"
                                MeSHContent += "PMID: "+str(record.get("PMID"))+"</a> is analysed on toxicity. </h4> \n"
                except (TypeError):
                        continue;
                
        return MeSHContent
Пример #17
0
def text_originating(query):
    """
    Given a query, the function returns the text of the top 10 PubMed search results
    """
    Entrez.email = "*****@*****.**"
    handle = Entrez.esearch(db="pubmed",
                            retmax=10,
                            term=query,
                            usehistory='y',
                            sort='relevance')
    record = Entrez.read(handle)
    query_key = record["QueryKey"]
    webenv = record["WebEnv"]
    handle = Entrez.efetch(db="pubmed",
                           webenv=webenv,
                           query_key=query_key,
                           retmax=10,
                           rettype="medline",
                           retmode="text")
    records = Medline.parse(handle)
    records = list(records)
    search_results_text = " "
    for record in records:
        deltatext = record.get("TI", "?") + record.get("AB", "?")
        search_results_text = search_results_text + deltatext + "\n"
    handle.close()
    return search_results_text
Пример #18
0
def pubmed():
    # Get the count of papers about orchid only in database pubmed
    Entrez.email = "*****@*****.**"     # Always tell NCBI who you are
    handle = Entrez.egquery(term="orchid")
    record = Entrez.read(handle)
    for row in record["eGQueryResult"]:
        if row["DbName"] == "pubmed":
            print "The count of papers about orchid in database pubmed:", row["Count"]

    # Get the list of ids of above
    handle = Entrez.esearch(db="pubmed", term="orchid", retmax=100)
    record = Entrez.read(handle)
    idlist = record["IdList"]
    print "The id list of papers about orchid in database pubmed:", idlist
    print

    # Search papers author by "Liu ZJ" from pubmed
    handle = Entrez.efetch(db="pubmed", id=idlist, rettype="medline",
                           retmode="text")
    records = Medline.parse(handle)
    search_author = "Liu ZJ"
    for record in records:
        if "AU" not in record:
            continue
        if search_author in record["AU"]:
            print "Author %s found." % search_author
            print "title:", record.get("TI", "?")
            print "authors:", record.get("AU", "?")
            print "source:", record.get("SO", "?")
            print
Пример #19
0
def table():
    query = request.form["query"]
    date1 = request.form["date1"]
    date2 = request.form["date2"]
    count = 0

    term = query + " AND " + str(date1) + ":" + str(date2) + " [PDAT]"

    search_results = Entrez.read(
        Entrez.esearch(db="pubmed",
                       term=term,
                       datetype="pdat",
                       usehistory="y",
                       RetMax=100000))

    ids = search_results['IdList']

    h = Entrez.efetch(db='pubmed', id=ids, rettype='medline', retmode='text')
    records = Medline.parse(h)

    table = ""
    for record in records:
        ti = record.get('TI', '-')
        ot = record.get('OT', '-')
        au = record.get('AU', '-')
        dp = record.get('DP', '-')
        table += "<tr><td><div class=\"comment more\">{0}</div></td><td><div class=\"comment more\">{1}</div></td><td><div class=\"comment more\">{2}</div></td><td>{3}</td><td><a href=https://www.ncbi.nlm.nih.gov/pubmed/?term=" "{4}" ">{4}</a></td></tr>".format(
            ti, ', '.join(ot), ', '.join(au), dp, str(ids[count]))

    return render_template("articles.html", table=table)
Пример #20
0
def fetch(query_key, webenv, database, results_number):
    """
    Fetches abstracted info from NIH databases for a number of articles with corresponding top search results' ids in (query_key, webenv)
    :param query_key:
    :param webenv:
    :param database:
    :param results_number:
    :return: abstracted info on each search result
    """
    # downloading Medline records in the Medline flat-file format,
    handle = Entrez.efetch(db=database,
                           webenv=webenv,
                           query_key=query_key,
                           retmax=results_number,
                           rettype="medline",
                           retmode="text")
    # handle = Entrez.efetch(db="pubmed", id=idlist, .....)
    # would result in separate search and fetch executions,
    # NIH (NCBI) advises to take advantage of their history support in this situation as follows:
    # userhistory='y' resulted in WebEnv and QueryKey arguments, that we use in fetch instead of using a list of ids

    records = Medline.parse(handle)
    records = list(records)  # converting records to a list
    handle.close()

    return records
Пример #21
0
def _parse_medline(text: str) -> List[dict]:
    """Convert the rettype=medline to dict.
    See https://www.nlm.nih.gov/bsd/mms/medlineelements.html
    """
    f = io.StringIO(text)
    medline_records = Medline.parse(f)
    return medline_records
Пример #22
0
def get_ids():
    records_list = []
    counter = 0
    term = "disability"
    database = "Pubmed"
    date_ymdh = "01/01/2018"
    Entrez.email = '*****@*****.**'
    id_handle = Entrez.esearch(db=database, retmax=100,
                               term=term)  # search the terms on pubmed
    id_result = Entrez.read(
        id_handle)  # gives the result direct from the pubmed page
    ids = id_result[
        'IdList']  # gives all of the id correlated to atricles about the terms
    print(ids)
    amount_hits = len(ids)  # amount of artictles corralated to the term
    if amount_hits > 0:
        text_handle = Entrez.efetch(db=database,
                                    id=ids,
                                    rettype='medline',
                                    retmode='text')
        records = Medline.parse(text_handle)
        number_of_publications = len(ids)
        try:
            for record in records:
                result = [
                    record['TI'], record['AB'], database, record['DP'],
                    record['AU'], record['LID'], ids[counter]
                ]
                records_list.append(result)
        except KeyError:
            pass

        records_list.append(result)
    # print(records_list[0][7])
    sort_records(records_list)
Пример #23
0
    def get_pubmed(doc_id='', query=''):
        """Return data from pubmed api"""
        try:
            parser = PubmedPaperParser()
            email = settings.CONSUMER_PUBMED_EMAIL
            Entrez.email = email
            if doc_id:
                handle = Entrez.esearch(
                    db='pubmed',
                    term='{doc_id}[AID] OR {doc_id}[PMID]'.format(
                        doc_id=doc_id))
            else:
                handle = Entrez.esearch(db='pubmed', term=query)

            record = Entrez.read(handle)
            handle.close()
            id_list = list(record["IdList"])
            handle = Entrez.efetch(db="pubmed",
                                   id=id_list,
                                   rettype="medline",
                                   retmode="text")
            records = Medline.parse(handle)
            entries = [record for record in records]
            if entries and entries[0].get('PMID'):
                entry = entries[0]
                return parser.parse(entry)
        except IOError:
            raise

        return None
Пример #24
0
def getCancerData(searchTerm , filename, email) :
    f = open(filename, "w")
    Entrez.email = email     # Always tell NCBI who you are
    handle = Entrez.egquery(term= searchTerm)
    record = Entrez.read(handle)
    for row in record["eGQueryResult"]:
        if row["DbName"]=="pubmed":
            print(row["Count"])         #prints number of articles

 
    retmax = row["Count"]
    retmax = 200 

    handle = Entrez.esearch(db="pubmed", term = searchTerm, retmax=retmax)
    record = Entrez.read(handle)
    idlist = record["IdList"]

    handle = Entrez.efetch(db="pubmed", id=idlist, rettype="medline",
                           retmode="text")
    records = Medline.parse(handle)

    records = list(records) #all pmids are in this list

    for record in records:
	s = ", "
	authors = s.join(record.get("AU", "?"))
	count = count + 1
        f.write("PMID: " + record.get("PMID", "?"))
        f.write("Title: " + record.get("TI", "?"))
        f.write("Authors: " + authors)                        #writes the title, author, 
        f.write("Source: " + record.get("SO", "?"))           #source and abstract to a file
        f.write("Abstract: " + record.get("AB", "?"))
        
    handle.close()
    f.close()
Пример #25
0
def pub_med_parser(drug, side_effect):
	
	drug_eng = Drugs.drugs(drug)
	side_effect = Sideeffect.sideEffect(side_effect)
	Entrez.email = "*****@*****.**"
	terms = "(("+drug_eng[0]+"[Title]) AND "+side_effect+"[Title/Abstract])" 
	handle= Entrez.esearch(db = "pubmed", term = terms, rettype = "medline", retmode = "text") 
	record = Entrez.read(handle)
	handle.close()

	idlist = record["IdList"]
	handle2 = Entrez.efetch(db="pubmed", id=idlist, rettype="medline",retmode="text")
	records = Medline.parse(handle2)
	records = list(records)

	var = 0
	titres = []
	for record in records:
		titre = record.get("TI", "?")
		titres.append(titre)

	for i in titres:
		if drug_eng[0] in i and side_effect in i :
			var += 1

	if var != 0:
		return True
	else:
		return False
Пример #26
0
def collect_example():
    for date in ['2015', '2016', '2017', '2018']:
        f = open('./example_{}.csv'.format(date), 'w')
        writer = csv.writer(f)
        writer.writerow(['PMID', 'title', 'abstract', 'label'])
        search_handler = Entrez.esearch(
            db='pubmed',
            term=
            'CLOCK Proteins/metabolism[MESH] OR Circadian Rhythm/*physiology[MESH] OR Circadian Clocks/physiology*[MESH]',
            mindate='{}/01/01'.format(date),
            maxdate='{}/12/31'.format(date),
            retmax=100000,
            usehistory='y')
        record = Entrez.read(search_handler)
        start = 0
        fetch_handler = Entrez.efetch(db='pubmed',
                                      rettype='medline',
                                      retmode='text',
                                      retstart=start,
                                      retmax=10000,
                                      webenv=record['WebEnv'],
                                      query_key=record['QueryKey'])
        records = Medline.parse(fetch_handler)
        for record in records:
            pmid = record.get('PMID')
            title = record.get('TI')
            abstract = record.get('AB')
            if pmid is not None and title is not None and abstract is not None:
                writer.writerow([pmid, title, abstract, ''])
Пример #27
0
def fetch_pubmed_data(pmid):

    from Bio import Medline,Entrez
    
    try:
        ncbiemail= settings.author_email
    except:
        try:
            ncbiemail= settings.author_email
        except:
            raise Exception('Please set an email to use ncbi services')
    
    Entrez.email = ncbiemail
    Entrez.tool = 'mybiodb'

    try:
        entrez_response=Medline.parse( Entrez.efetch(db="pubmed", id=pmid, rettype="medline",retmode="text",)).next()
        if not entrez_response.has_key('PMID'):
             response.flash='pubmed ID error'
        else:
            return entrez_response
    except IOError:
        session.flash='Remote service not available, please try again.'

       
    return
Пример #28
0
def search_pubmed(term):
    "Searches a term on pubmed"
    print("Searching for", term)
    try:
        # Do a first query
        handle = Entrez.egquery(term=term)
        record = Entrez.read(handle)
        nb_entries = 0
        for row in record["eGQueryResult"]:
            if row["DbName"]=="pubmed":
                nb_entries = row["Count"]
                print(row["Count"], 'results found.')
        if int(nb_entries) == 0:
            return BibDatabase()
        # Search the IDs
        handle = Entrez.esearch(db="pubmed", term=term, retmax=min(int(nb_entries), MAX_RESULTS))
        record = Entrez.read(handle)
        idlist = record["IdList"]
        # Get the descriptions
        handle = Entrez.efetch(db="pubmed", id=idlist, rettype="medline", retmode="text")
        records = Medline.parse(handle)
        records = list(records)
        return transform_pubmed(records)

    except Exception as e:
        print('The search failed.')
        print(e)
        return []
Пример #29
0
def get_information(ids):
    database = "pubmed"
    records_list = []
    columns_list = []
    text_handle = Entrez.efetch(db=database,
                                id=ids,
                                rettype='medline',
                                retmode='text')
    records = Medline.parse(text_handle)
    print("Aantal publicaties: ", len(ids))
    headers = ["Title", "Abstract", "Database", "Date", "Author"]
    columns_list.append(headers)
    try:
        for record in records:
            # print("Titel: ", record['TI'])
            # print("Abstract: ", record['AB'])
            # print("Database: ", database)
            # print("Datum: ",record['MHDA'])
            # print("Auteur: ", record['AU'])
            result = [
                record['TI'], record['AB'], database, record['MHDA'],
                record['AU']
            ]
            records_list.append(result)
    except KeyError:
        pass
    for header in headers:
        print(header)
Пример #30
0
    def fetch_articles(id_list):
        """
        This function retrieves the articles corresponding to the given id's and 
        subsequently parses them using the MedLine (BioJava) parser. 
        :param id_list: A list of id's.
        :return: A list of parsed articles corresponding to the given id's.
        """
        articles = []
        count = 0
        if id_list:
            for ID in id_list:
                count += 1

                try:
                    handle = Entrez.efetch(db="pubmed",
                                           id=ID,
                                           rettype="medline",
                                           retmode="text")
                    article = Medline.parse(handle)
                    articles.append(list(article)[0])
                    if count == 500:  # being nice to NCBI
                        print("pause for 30 seconds")
                        time.sleep(30)
                except:
                    continue
            return articles
        else:
            return None
Пример #31
0
def retrieve(idlist):
    """ Recieves the list with PMID's and retrieves all the necessary data
        from those articles in a 2d list.
        :param:
            idlist (list): A list of all the PMID's of the articles with the search terms.
        :return:
            Artikels (2d list): A list with lists that contains all the necessary information for each article.
        """

    handle = Entrez.efetch(db="pubmed",
                           id=idlist,
                           rettype="medline",
                           retmode="text")
    records = Medline.parse(handle)
    records = list(records)

    articles = []
    counter = 1

    for record in records:
        Article = ["Artikel: " + str(counter)]  # Article index[0]
        Article.append(record.get("PMID", "?"))  # PMID index[1]
        Article.append(record.get("TI", "Unknown"))  # Title index[2]
        Article.append(record.get("AU", "Unknown"))  # Author index[3]
        Article.append(record.get("AB", "Unknown"))  # Abstract index[4]
        Article.append(int(determine__year(record.get(
            "EDAT", "0"))))  # Publication year index[5]
        Article.append("https://www.ncbi.nlm.nih.gov/pubmed/" +
                       record.get("PMID", "?"))  # URL index [6]

        if Article[5] >= 2010:
            articles.append(Article)
            counter += 1

    return articles
Пример #32
0
    def _retrieve_record_batch(self, batch_start, batch_size):
        """Retrieves a PubMed article record batch.

        Retrieval is based on the info recovered by '_search_for_records()'.
        The batch size is limited by the 'batch_start' and 'batch_size'
        parameters. Returns a string containing the article info, if execution
        was successful and returns None otherwise.

        Args:
            batch_start (int): Specifies the starting index of this record
                batch.
            batch_size (int): Specifies the size of this records batch.

        Returns:
            list: A list of dictionaries that hold the data for each record.
        """
        if None in [self.search_record_web_env, self.search_record_query_key]:
            raise ValueError(  # Perform a search first!
                'No WebEnv or QueryKey data in this PubMed class instance.')

        fetch_handle = Entrez.efetch(db='pubmed',
                                     rettype='medline',
                                     retmode='text',
                                     retstart=batch_start,
                                     retmax=batch_size,
                                     webenv=self.search_record_web_env,
                                     query_key=self.search_record_query_key)

        data = Medline.parse(fetch_handle)
        records = [record for record in data]
        fetch_handle.close()

        return records
Пример #33
0
 def test_pubmed_16381885_and_19850725(self):
     """Bio.TogoWS.entry("pubmed", "16381885,19850725")"""
     handle = TogoWS.entry("pubmed", "16381885,19850725")
     records = list(Medline.parse(handle))
     handle.close()
     self.assertEqual(len(records), 2)
     self.assertEqual(records[0]["TI"], "From genomics to chemical genomics: new developments in KEGG.")
     self.assertEqual(
         records[0]["AU"],
         [
             "Kanehisa M",
             "Goto S",
             "Hattori M",
             "Aoki-Kinoshita KF",
             "Itoh M",
             "Kawashima S",
             "Katayama T",
             "Araki M",
             "Hirakawa M",
         ],
     )
     self.assertEqual(
         records[1]["TI"],
         "DDBJ launches a new archive database with analytical tools " + "for next-generation sequence data.",
     )
     self.assertEqual(
         records[1]["AU"],
         ["Kaminuma E", "Mashima J", "Kodama Y", "Gojobori T", "Ogasawara O", "Okubo K", "Takagi T", "Nakamura Y"],
     )
Пример #34
0
def gather_pubmed_journal_article_titles(journal, mindate, maxdate):
    # https://dataguide.nlm.nih.gov/eutilities/utilities.html
    handle = Entrez.esearch(db='pubmed',
                            term='{}[Journal]'.format(journal),
                            retmax=100000,
                            retmode='text',
                            mindate='{}'.format(mindate),
                            maxdate='{}'.format(maxdate))
    records = Entrez.read(handle)
    id_list = records['IdList']
    #print(idlist) # this is a list

    handle = Entrez.efetch(db='pubmed',
                           id=id_list,
                           rettype='medline',
                           retmode='text')
    records = Medline.parse(handle)

    file = open('{}_article_titles.txt'.format(journal), 'w')

    for record in records:
        title = re.sub(r'(?!\d)[.,()?;:](?!\d)', '', record.get('TI'))
        file.write(title + '\n')

    file.close()
Пример #35
0
def pubsearch(jids):
    Entrez.email = "*****@*****.**"
    # always let Entrez know who is calling

    pubterm = ""
    for i in jids:
        pubterm += i + "[JID] or "

    IDhandle = Entrez.esearch(
        db="pubmed", term="peptide AND (" + pubterm + " and ", mindate="2011", maxdate="2014", retmax=2500
    )
    # for documentation on esearch, see
    # http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
    # max number for retmax is 100k. Use retstart to get more than this.
    # Date range used to limit a search result by the date specified by datetype. These two parameters (mindate, maxdate) must be used together to specify an arbitrary date range. The general date format is YYYY/MM/DD, and these variants are also allowed: YYYY, YYYY/MM.

    record = Entrez.read(IDhandle)
    # record is returned as a dictionary. Lists search terms, all ID numbners etc

    idlist = record["IdList"]
    # return a list of ID numbers from the record dictionary

    recordHandle = Entrez.efetch(db="pubmed", id=idlist, rettype="medline", retmode="text")
    # search pubmed for records with idlist as input

    records = Medline.parse(recordHandle)
    # create dictionary from recordHandle

    return records
Пример #36
0
def top_papers_dict(papers, outpath=None,delim="\t", top=None):
    """This function fetches all the relevent PubMed info for each PMID in 'papers' 
    (at the limit supplied in 'top') and 1) puts it into a dict."""
    #
    # Can be used with SP & GOA data
    
#    papers_annots = [(len(papers_prots[p]), p) for p in papers_prots]
    papers_annots = [(len(papers[p]), p) for p in papers]
    papers_annots2_dict = {}
        
    papers_annots.sort()
    if top is None:
        negTop = 0
    else:
        negTop = -top
    idlist = [p[1] for p in papers_annots[negTop:]]
    Entrez.email = MY_EMAIL
    h = Entrez.efetch(db="pubmed", id=",".join(idlist), 
                          rettype="medline", retmode="text")
    medrecs = list(Medline.parse(h))
    titles = [medrec.get("TI","?") for medrec in medrecs]
    years = [medrec.get("DP","?") for medrec in medrecs]
    journals = [medrec.get("JT", "?") for medrec in medrecs]
    for p, title, year, journal in zip(papers_annots[negTop:], titles,years, journals):
        #papers_annots2_dict[PMID] = [# of total annotations, Title, Year, Journal] 
        papers_annots2_dict[p[1]] = [len(papers[p[1]]), title, year.split()[0].strip(), journal]
    """if outpath:
        fout = open(outpath,"w")
        print >> fout, "num proteins\tpubmed ID\tTitle\tYear\tJournal"
        for p in papers_annots2:
            print >> fout, "%d\t%s\t%s\t%s\t%s" % p
        fout.close()
    """
    return papers_annots2_dict
def main(Substance, Organism, Gene):
    zoekterm1 = "Cocaine"
    zoekterm2 = "Elegans"
    MAX_COUNT = 50
    dic = {}
    titels = []
    TERM = ''
    TERMS = []
    count = 1
    if zoekterm2 == "":
        TERM = zoekterm1
    if zoekterm1 == "":
        print("vul een zoekterm in")
        sys.exit()
    elif zoekterm2 != "":
        TERM = zoekterm1+" and "+zoekterm2
    TERMS.append(TERM)
    print(TERM)
    handle = Entrez.esearch(db="pubmed", term= TERM, retmax=MAX_COUNT)
    record = Entrez.read(handle)
    idlist = record["IdList"]
    handle = Entrez.efetch(db="pubmed", id=idlist, rettype="medline",
                           retmode="text")
    records = Medline.parse(handle)
    records = list(records)
    for record in records:
        titel = record.get("PMID","?")
        titels.append(titel)
        pubSet = set(titels)        
    dic[TERM] = pubSet
    print(dic)
    return "Jay"
Пример #38
0
def get_pubmed_article(pubmed_id):
    # http://biopython.org/DIST/docs/tutorial/Tutorial.html#htoc126
    response = {}
    Entrez.email = "*****@*****.**"
    handle = Entrez.efetch(db="pubmed", id=pubmed_id.strip(),
                           rettype="medline", retmode="text")
    records = Medline.parse(handle)
    for record in records:
        response["pubmedid"] = pubmed_id
        response["title"] = record.get("TI", "")
        response["authors"] = record.get("AU", "")
        response["journal"] = record.get("TA", "")
        response["year"] = record.get("EDAT", "").split("/")[0]
        lidstring = record.get("LID", "")
        if "[doi]" in lidstring:
            response["doi"] = record.get("LID", "").split(" ")[0]
        else:
            response["doi"] = ""
        if not response["doi"]:
            aids = record.get("AID", "")
            for aid in aids:
                log.debug("AID:" + aid)
                if "[doi]" in aid:
                    response["doi"] = aid.split(" ")[0]
                    break
                else:
                    response["doi"] = ""

        break
    return response
Пример #39
0
def download_abstracts(dataset, path='.', email=None, out_file=None):
    """ Download the abstracts for a dataset/list of pmids
    """
    if email is None:
        raise Exception('No email address provided.')
    Entrez.email = email

    if isinstance(dataset, Dataset):
        pmids = dataset.image_table.ids.astype(str).tolist()
    elif isinstance(dataset, list):
        pmids = [str(pmid) for pmid in dataset]
    else:
        raise Exception('Dataset type not recognized: {0}'.format(
            type(dataset)))

    records = []
    # PubMed only allows you to search ~1000 at a time. I chose 900 to be safe.
    chunks = [pmids[x:x + 900] for x in xrange(0, len(pmids), 900)]
    for chunk in chunks:
        h = Entrez.efetch(db='pubmed',
                          id=chunk,
                          rettype='medline',
                          retmode='text')
        records += list(Medline.parse(h))

    # Pull data for studies with abstracts
    data = [[study['PMID'], study['AB']] for study in records
            if study.get('AB', None)]
    df = pd.DataFrame(columns=['pmid', 'abstract'], data=data)
    if out_file is not None:
        df.to_csv(os.path.join(os.path.abspath(path), out_file), index=False)
    return df
Пример #40
0
def medline(zoek):
    handle = Entrez.efetch(db='pubmed',
                           id=search(zoek),
                           rettype='medline',
                           retmode='text')
    records = Medline.parse(handle)
    records = list(records)

    d = []

    for record in records:
        ID = record.get('PMID', '?')
        AB = record.get('AB', '?').upper()

        wordlist = AB.split()  #split de Abstract string in lijst met woorden
        wordfreq = []

        wordfreq = [wordlist.count(p) for p in Keywords]
        freqdict = dict(zip(Keywords, wordfreq))

        data = [(freqdict[key], key) for key in freqdict]
        data.sort()
        data.reverse()

        data2 = [(t[1], t[0]) for t in data]

        #print(data2)

        result = {ID: data2}
        d.append(result)

    with open('Output.json', 'w') as f:
        json.dump(d, f)
Пример #41
0
def get_medline_records(pmids):
    handle = Entrez.efetch(db="pubmed",
                           id=pmids,
                           rettype="medline",
                           retmode="text")
    records = Medline.parse(handle)
    return list(records)
Пример #42
0
def retrieve_pubmed_articles(abstracts_file_name, pmids_file_name, seed=4):
    pmids = random.Random(seed).sample(range(1, 33500000), 20000)
    pmids_retr = ', '.join(map(str, pmids))
    used_pmids = []

    abstract_count = 0
    with open(abstracts_file_name, "w", encoding="utf-8") as outfile:
        for index, start in enumerate(range(0, len(pmids), 10000)):
            print("batch", index + 1)
            handle = Entrez.efetch(db='pubmed',
                                   id=pmids_retr,
                                   rettype='medline',
                                   retmode='text',
                                   retstart=start)
            records = Medline.parse(handle)

            for record in tqdm(records, total=10000, desc="Progress"):
                if "AB" in record and abstract_count < 10000:
                    outfile.write(record["AB"] + "\n")
                    abstract_count += 1
                    used_pmids.append(record["PMID"])
                elif abstract_count >= 10000:
                    break

            handle.close()
            print("collected abstracts:", abstract_count)

    with open(pmids_file_name, "w", encoding="utf-8") as outfile:
        outfile.write("\n".join(used_pmids))
    return used_pmids
Пример #43
0
def searchids(eiwit, jaartal):
    Entrez.email = "*****@*****.**"
    date2 = str(int(str(datetime.datetime.today())[0:4]) + 1)
    readhandle = Entrez.read(
        Entrez.esearch(db="pubmed",
                       retmax=100000,
                       term=str(eiwit) +
                       " AND {0}:{1} [PDAT]".format(jaartal, date2),
                       datetype="pdat",
                       usehistory="y"))
    ids = readhandle.get('IdList')
    closedArtikels = Entrez.efetch(db="pubmed",
                                   id=ids,
                                   rettype="medline",
                                   retmode="text")
    openArtikels = Medline.parse(closedArtikels)
    newRow = ""
    if len(ids) > 0:
        i = 0
        count = -1
        for artikel in openArtikels:
            i += 1
            count += 1
            abstract = artikel.get("AB", "-")
            author = artikel.get("AU", "-")
            dateOfPublish = artikel.get("DP", "-")
            publicationType = artikel.get("PT", "-")
            pmid = artikel.get("PMID", "-")
            keywords = artikel.get("KYWD", "-")
            title = artikel.get("TI", "-")
            newRow += "<tr><td><a href=" "https://www.ncbi.nlm.nih.gov/pubmed?term=" + str(
                ids[count]) + ">" + str(
                    ids[count]) + "</td><td>" + title + "</td><td>" + ",".join(
                        author
                    ) + "</td><td>" + dateOfPublish + "</td><td>" + "\n".join(
                        keywords) + "</td><td>" + "".join(
                            abstract) + "</td></tr>"
            nodeID = str('m-' + str(i))
            dataid = str('"id":' + '"' + nodeID + '"')
            datapub = str('"name":' + pmid)
            datawords = str('"Keywords":' + '"' + keywords + '"')
            dataloaded = str('"loaded":' + 'true')
            if len(ids) == 1:
                bestand.write('{' + dataid + ', ' + datawords + ', ' +
                              datapub + ', ' + dataloaded + '},' + '\n')
            elif count + 1 < len(ids):
                bestand.write('{' + dataid + ', ' + datawords + ', ' +
                              datapub + ', ' + dataloaded + '},' + '\n')
            elif count + 1 >= len(ids):
                bestand.write('{' + dataid + ', ' + datawords + ', ' +
                              datapub + ', ' + dataloaded + '}' + '\n')

    bestand.write('],' + '\n')
    bestand.write('"links":[' + '\n')
    bestand.write('{"id":' + '"101", ' + '"from":' + '"m-0", ' + '"to":' +
                  '"m-1", ' + '"type":' + '100},' + '\n')
    bestand.write('{"id":' + '"101", ' + '"from":' + '"m-1", ' + '"to":' +
                  '"m-2", ' + '"type":' + '100}')
    bestand.write('\n' + ']' + '\n' + '}')
    return newRow
Пример #44
0
def top_papers(papers,outpath=None,delim="\t", top=20):
    """This function fetches all the relevant PubMed info for each PMID in 'papers' and 
    1) puts it into a list and 2) outputs it to a file named in outpath."""
    #
    # Can be used with SP & GOA data
    
    papers_annots = [(len(papers[p]), p) for p in papers]
    papers_annots2 = []
        
    papers_annots.sort()
    idlist = [p[1] for p in papers_annots[-top:]]
    Entrez.email = "*****@*****.**"
    h = Entrez.efetch(db="pubmed", id=",".join(idlist), 
                          rettype="medline", retmode="text")
    medrecs = list(Medline.parse(h))
    titles = [medrec.get("TI","?") for medrec in medrecs]
    years = [medrec.get("DP","?") for medrec in medrecs]
    journals = [medrec.get("JT", "?") for medrec in medrecs]
    for p, title, year, journal in zip(papers_annots[-top:], titles,years, journals):
        papers_annots2.append((p[0],p[1], title, year.split()[0].strip(), journal))
    if outpath:
        fout = open(outpath,"w")
        print >> fout, "num proteins\tpubmed ID\tTitle\tYear\tJournal"
        for p in papers_annots2:
            print >> fout, "%d\t%s\t%s\t%s\t%s" % p
        fout.close()
    #papers_annots2 = [(# all annotations, PMID, Title, Year, Journal)] 
    return papers_annots2
Пример #45
0
def get_citations(rec_paper_list, logger):
    """Takes a list of paper ID numbers and return a PubMed reference for each paper on the list.

    Arguments:
    rec_paper_list - list of strs; paper IDs numbers

    Returns:
    None
    (prints the references to the screen)
    """
    id_list = ",".join(rec_paper_list)
    search_results = Entrez.read(Entrez.epost("pubmed", id=id_list))
    query_key = search_results["QueryKey"]
    webenv = search_results["WebEnv"]
    handle = Entrez.efetch(db="pubmed",
                           id=id_list,
                           rettype='medline',
                           retmode='text',
                           webenv=webenv,
                           query_key=query_key)
    records = Medline.parse(handle)
    for index, record in enumerate(records, 1):
        logger.info("{}. {} {}. {}. {}. ({})".format(index,
                                                     record.get("TI", "?"),
                                                     record.get("AU", "?"),
                                                     record.get("JT", "?"),
                                                     record.get("DP", "?"),
                                                     record.get("PMID", "?")))
Пример #46
0
def pubsearch(jids):
    Entrez.email = "*****@*****.**"
    #always let Entrez know who is calling    
    
    pubterm = ""
    for i in jids:
        pubterm += i+"[JID] or "
    
    IDhandle = Entrez.esearch(db="pubmed", term="peptide AND ("+pubterm+" and ", mindate="2011", maxdate="2014", retmax=5)
    #for documentation on esearch, see
    #http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
    #max number for retmax is 100k. Use retstart to get more than this.
    #Date range used to limit a search result by the date specified by datetype. These two parameters (mindate, maxdate) must be used together to specify an arbitrary date range. The general date format is YYYY/MM/DD, and these variants are also allowed: YYYY, YYYY/MM.
    
    record = Entrez.read(IDhandle)
    # record is returned as a dictionary. Lists search terms, all ID numbners etc
    
    idlist = record["IdList"]
    #return a list of ID numbers from the record dictionary
    
    recordHandle = Entrez.efetch(db="pubmed", id=idlist, rettype="medline", retmode="text")
    #search pubmed for records with idlist as input
    
    records = Medline.parse(recordHandle)
    #create dictionary from recordHandle
    
    return records
def fetch_details(id_list, email, api_key=None):
    '''Retrieve the PMID details for the list of PMIDs'''
    num_ids = len(id_list)  # a single call only returns up to 10,000
    Entrez.email = email
    if api_key:
        Entrez.api_key = api_key

    # see https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.EFetch for information on parameters
    # see https://www.nlm.nih.gov/bsd/mms/medlineelements.html for the medline elements
    # efetch can only return a max of 10000 results per request, so we'll work in chunks
    results = []
    start = 0
    end = 0
    print(f"  Retrieving details for {num_ids} PMIDs ...")
    # we'll use a the chunks generator to get batches of 10,000 PMIDs
    for batch in chunks(id_list, 10000):
        start = end
        end = start + len(batch)
        print(f"    Retrieving details for PMIDs {start} to {end} ...")
        pmids = ','.join(batch)
        handle = Entrez.efetch(db="pubmed",
                               id=pmids,
                               rettype="medline",
                               retmode="text")
        results.extend(list(Medline.parse(handle)))
    print("    Done.")
    return results
Пример #48
0
def getCancerData(searchTerm, filename, email):

    Entrez.email = email  # Always tell NCBI who you are
    handle = Entrez.egquery(term=searchTerm)
    record = Entrez.read(handle)

    idlistAll = 0
    for row in record["eGQueryResult"]:
        if row["DbName"] == "pubmed":
            print(row["Count"])  #prints number of articles
    retmax = int(row["Count"])

    handle = Entrez.esearch(db="pubmed", term=searchTerm, retmax=retmax)
    record = Entrez.read(handle)
    idlistAll = record["IdList"]

    ### loop through each batch. There is a limit to efetch
    start = 0

    while start < len(idlistAll):

        filename2 = filename + str(start) + ".txt"  #Creates file names
        f = open(filename2, "w")  #Opens them

        batchSize = 2000

        end = start + batchSize
        if end > len(idlistAll) + 1:
            end = len(idlistAll) + 1  #Creates the batches

        idlist = idlistAll[start:end]

        handle = Entrez.efetch(db="pubmed",
                               id=idlist,
                               rettype="medline",
                               retmode="text")  #
        records = Medline.parse(handle)  # Extracts the info from pubmed
        records = list(records)  #all pmids are in this list

        for record in records:  #print record
            #  print "Article #", count
            #  print "PMID: ", record.get("PMID", "?")
            #  print "title:", record.get("TI", "?")
            #  print "authors:", record.get("AU", "?")
            #  print "source:", record.get("SO", "?")
            #  count = count + 1
            s = ", "
            authors = s.join(record.get("AU", "?"))
            f.write("PMID: " + record.get("PMID", "?") + "\t")
            f.write("Title: " + record.get("TI", "?") + "\t")
            f.write("Authors: " + authors + "\t")  #writes the title, author,
            f.write("Source: " + record.get("SO", "?") +
                    "\t")  #source and abstract to a file
            f.write("Abstract: " + record.get("AB", "?") + "\n")

        print("Batch starting at " + str(start) + " is complete")

        start = start + batchSize  #moves to next batch
    handle.close()
    f.close
Пример #49
0
def search_ncbi(query,
                autocorrect=False,
                db='pubmed',
                reldate=365,
                max_results=500):
    '''
    Searches the ncbi databank for the specific query.
    Note that autocorrect=True uses another request to the Entrez webserver.

    The parameters which can be used for the Entrez methods can be found on
    https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
    The meaning of the keys in the results can be found on
    https://www.nlm.nih.gov/bsd/mms/medlineelements.html

    :param Str db: Database to be searched
    https://www.ncbi.nlm.nih.gov/books/NBK25497/table/chapter2.T._entrez_unique_identifiers_ui/?report=objectonly
    :param Int reldate: The search returns only those items that are no older than reldate days.
    :return: List of search results, containing (among other informations) the title and abstract of the respective
    papers.
    :rtype List[Dict]:
    '''
    if autocorrect:
        query = _preprocess_query(query, db)

    handler = Entrez.esearch(db=db,
                             term=query,
                             reldate=reldate,
                             datetype="pdat",
                             sort='Best match',
                             usehistory="y")

    search_results = Entrez.read(handler)
    results_as_text = []

    # Download in batches, since url may break if max_results is large.
    batch_size = 25
    count = min(max_results, int(search_results["Count"]))
    steps = list(range(0, count, batch_size))
    batch_sizes = [batch_size for _ in steps]
    # Last batch_size is chosen such that the total number of retrieved documents is equal to max_results
    if len(steps) > 0:  # catch empty search results
        batch_sizes[-1] = count - steps[-1]

    for start, batch_size in zip(steps, batch_sizes):
        try:
            fetch_handle = Entrez.efetch(db=db,
                                         rettype="medline",
                                         retmode="text",
                                         retstart=start,
                                         retmax=batch_size,
                                         webenv=search_results["WebEnv"],
                                         query_key=search_results["QueryKey"])
            results_as_text += fetch_handle.read().split('\n')
        except HTTPError as err:
            pass

    results = [dict(result) for result in Medline.parse(results_as_text)]

    results = _postprocess_results(results)
    return results
Пример #50
0
def get_pubmed_data(idlist):
	"""Takes a list of pubmed ids and returns title, auth, yr"""
	handle = Entrez.efetch(db='pubmed', id=idlist, rettype='medline', retmode='text')
	records = Medline.parse(handle)
	mypms = []
	for record in records:
		mypms.append((record["TI"], record["AU"], record["PMID"]))
	return mypms
Пример #51
0
def pubmedsearch (TERM, MAX_COUNT = 10000):
    # Returns an Entrez object matching *TERM*
    Entrez.email = '*****@*****.**'
    h = Entrez.esearch(db='pubmed', retmax=MAX_COUNT, term=TERM)
    result = Entrez.read(h)
    ids = result['IdList']
    h = Entrez.efetch(db='pubmed', id=ids, rettype='medline', retmode='text') 
    records = Medline.parse(h)
    return records
def Pubmedsearch(PMID):
	pmid = ""
	pmid = PMID

    	handle = Entrez.efetch(db="pubmed", id= pmid, rettype="medline",retmode="text")
    	records = Medline.parse(handle)
    	records = list(records)
    	for record in records:
        	return (str(pmid)+"\t"+str(record.get("TI", "?"))+"\t"+str(record.get("FAU", "?"))+"\t"+str(record.get("AU", "?"))+"\t"+str(record.get("AD", "?")))
Пример #53
0
def get_record_from_pmid(pmid):
    # now get the actual citation; should really only be a singleton,
    # but this library likes to operate over lists
    citations = Entrez.efetch(db="pubmed",id=pmid,
                                rettype="medline",retmode="text")

    # again, Bio likes to operate on lists, even though we only have
    # a singleton here
    record = list(Medline.parse(citations))[0]
    return record
Пример #54
0
def get_articles(term):
    idlist = get_article_ids(term)
    counter=0
    #pbar = make_pbar(len(idlist),text="Fetching")
    
    articles=[]
    if len(idlist) > 100:
        chunks=[idlist[i:i+100] for i in range(0, len(idlist), 100)]
        for chunk in chunks:
            handle = Entrez.efetch(db="pubmed", id=chunk, rettype="medline", retmode="text")
            articles.extend(list(Medline.parse(handle)))
            print '#'
            #pbar.update(p.currval+len(chunk))
    else:
        handle=Entrez.efetch(db="pubmed", id=idlist, rettype="medline", retmode="text")
        articles.extend(list(Medline.parse(handle)))
    #pbar.finish()
    
    return articles
Пример #55
0
def store_abstract_with_pmid(pmid,queryTag=None):
	"""Populate the PG databases with the MEDLINE entries having these pmid. Pmid can is a scalar
	or a list of pmid
	"""
	if queryTag==None:
		queryTag="PMID"
	Entrez.email="*****@*****.**"
	handle=Entrez.efetch(db="pubmed",rettype="medline",retmode="text",id=pmid)
	for r in Medline.parse(handle):
		store_medline_entry(r,queryTag)
Пример #56
0
def pubmedsearch (TERM, MAX_COUNT = 10000):
    # Returns an Entrez object matching *TERM*
    Entrez.email = '*****@*****.**'
    Entrez.tool = 'pm_impacts'
    h = Entrez.esearch(db='pubmed', retmax=MAX_COUNT, term=TERM)
    result = Entrez.read(h)
    ids = result['IdList']
    h = Entrez.efetch(db='pubmed', id=ids, rettype='medline', retmode='text') 
    records = Medline.parse(h)
    return records
Пример #57
0
	def fetch_abstract(self,Titulo):
		Entrez.email = "*****@*****.**"
		handle = Entrez.esearch(db = "pubmed", term = Titulo, retmax = 1)
		result= Entrez.read(handle)
		handle.close
		idlist=result["IdList"]
		handle2 = Entrez.efetch(db="pubmed", id=idlist, rettype="medline", retmode="text")
		result2 = Medline.parse(handle2)
		for record in result2:
			print(record["AB"])
		handle2.close()
Пример #58
0
 def extractRecords(self):
     """
     Extracts a user defined number of records from
     pubMed based on a query string, ideally PDZ domain.
     """
     handle = Entrez.esearch("pubmed", sys.argv[1], retmax=int(sys.argv[2]))
     record = Entrez.read(handle)
     idlist = record["IdList"]
     handle = Entrez.efetch(db="pubmed", id=idlist, rettype="medline", retmode="text")
     records = Medline.parse(handle)
     self.Records = list(records)
Пример #59
0
def medline_download_entries(pmids):
	Entrez.email="*****@*****.**"
	request = Entrez.epost("pubmed",id=",".join(map(str,pmids)))
	result = Entrez.read(request)
	webEnv = result["WebEnv"]
	queryKey = result["QueryKey"]
	handle = Entrez.efetch(db="pubmed",rettype="medline",retmode="text", webenv=webEnv, query_key=queryKey)
	all_entries=[]
	for r in Medline.parse(handle):
		all_entries.append(r)
	return all_entries