def doi_article(ref, user=None): article_array = [] fetch = PubMedFetcher() for doi in Reference.doi(ref): article = fetch.article_by_doi(doi) article_array.append(article) return article
def processPMID(self, description, document, text): """XXX""" pmid = re.compile('PMID *(\d+)') list_pmid = pmid.findall(description) description = re.sub(r'\[PMID *\d+\]', '', description) pmcid = re.compile('PMCID *(\d+)') list_pmcid = pmcid.findall(description) description = re.sub(r'\[PMCID *\d+\]', '', description) para = description.split(ur'\n') for para_str in para: #print para_str p = document.add_paragraph(' ') p.add_run(para_str) std_str = u"我们通过检测您的基因位点,使用PUBMED等国际公认参考系统,我们认为" + text + u"。" p = document.add_paragraph(' ') p.add_run(std_str) fetch = PubMedFetcher() for pmid in list_pmid: # http://www.ncbi.nlm.nih.gov/pubmed/26471457 pm = fetch.article_by_pmid(pmid) title = pm.title title = re.sub('\.', '', title) citation = '. '.join([title, pm.journal]) p = document.add_paragraph() p.add_run(citation).italic = True for pmcid in list_pmcid: pm = fetch.article_by_pmcid(pmcid) title = pm.title title = re.sub('\.', '', title) citation = '. '.join([title, pm.journal]) p = document.add_paragraph() p.add_run(citation).italic = True
def pmcid_article(ref, user=None): article_array = [] fetch = PubMedFetcher() for pmcid in Reference.pmcid(ref): article = fetch.article_by_pmcid(pmcid) article_array.append(article) return article
def get_info_by_DOI(DOI: str) -> Dict: '''This function takes a DOI str, requests information about the corresponding article via metapub or crossref and checks if all necessary information has been retrieved.''' article_dict = {} fetch = PubMedFetcher() try: article = fetch.article_by_doi(DOI) # Save information in Dict for info in dir(article): if info[0] != '_': article_dict[info] = eval('article.' + info) # Add data retrieval info to the dict article_dict = add_retrieval_information(article_dict, 'MetaPub', 'DOI', DOI) except MetaPubError: # If it does not work via Metapub, do it via Crossref Api # If there is a timeout, try again (5 times) for _ in range(5): try: works = Works() article_dict = works.doi(DOI) break except: pass #article_dict = normalize_crossref_dict(article_dict) # Add data retrieval info to the dict #if contains_minimal_information(article_dict): article_dict = add_retrieval_information(article_dict, 'Crossref', 'DOI', DOI) return article_dict
def consultametapub(): fetch = PubMedFetcher() if not request.json: abort(400) pmid = request.json['id'] article = fetch.article_by_pmid(pmid) return jsonify(output=article.title)
def test_configurable_cachedir(self): """ Test that `cachedir` keyword argument is fully supported in modes: cachedir == 'default' <-- assumed working since other tests use this. cachedir is None cachedir is 'some/path' cachedir is '~/path' """ cachedir = TEST_CACHEDIR # start with cachedir==None; test that no cachedir is created. fetch = PubMedFetcher(cachedir=None) assert not os.path.exists(cachedir) fetch = PubMedFetcher(cachedir=cachedir) assert os.path.exists(cachedir) os.unlink(fetch._cache_path) os.rmdir(cachedir) fetch = PubMedFetcher(cachedir='~/testcachedir') assert os.path.exists(os.path.expanduser('~/testcachedir')) os.unlink(fetch._cache_path) os.rmdir(os.path.expanduser('~/testcachedir'))
def pmids_from_citation(author_last_name=None, year=None, volume=None, first_page=None, journal_title=None): fetch = PubMedFetcher() return fetch.pmids_from_citation(aulast=author_last_name, year=year, volume=volume, first_page=first_page, jtitle=journal_title)
def crawl_chem_abstract(self, keyword, retmax=300): fetch = PubMedFetcher() self.progress_bar_value.emit(self.count) pmids = fetch.pmids_for_query(keyword, retmax=retmax) self.textBrowser_value.emit("Scanning Iteration : " + str(retmax)) self.textBrowser_value.emit("Expected Running Time : " + str(retmax * 2) + " seconds.") self.textBrowser_value.emit("PMID Scan Done!") json_dicts = [] self.textBrowser_value.emit("Crawling Paper Info..") for i in range(len(pmids)): pmid = pmids[i] try: if int(i / len(pmids) * 100) > self.count: self.count = int(i / len(pmids) * 100) self.progress_bar_value.emit(self.count) try: article = fetch.article_by_pmid(pmid) except: self.textBrowser_value.emit("Error reading " + str(pmid)) continue chemical = article.chemicals if not chemical: continue abstract = article.abstract.replace(",", "*") if not abstract: continue elif "\t" in abstract or "\n" in abstract: abstract = abstract.replace("\t", " ") abstract = abstract.replace("\n", " ") title = article.title if not title: continue elif "\t" in title or "\n" in title: title = title.replace("\t", " ") title = title.replace("\n", " ") chemical["title"] = title chemical["abstract"] = abstract json_dicts.append(chemical) except: continue self.textBrowser_value.emit("Progress Done!") return json_dicts
def keyword_query(keywords=sys.argv[1], savepath=sys.argv[2], start_date=None, end_date=None, num_of_articles=1000): """ keyword_query takes in a keyword string or list of keywords, and outputs a dataframe with article meta data that matches the keyword query. **NOTE**: Long queries (~1000+ articles) will take > 5 minutes. Thus, it is advisable to add additional keywords and filters to constrain the search space. :param keywords: A string or a list of keywords to query. :param savepath: A string denoting the full path to save the file in. :param start_date: A string denoting the start date. :param end_date: A string denoting the end date. :param num_of_articles: An integer denoting the maximum number of articles. :return df: A pandas dataframe of the query. """ fetch = PubMedFetcher() # Get PMIDs using query pmids = fetch.pmids_for_query(query=keywords, since=start_date, until=end_date, retmax=num_of_articles) print("Number of PMIDs with search query: " + str(len(pmids))) # Get abstracts based on keyword search. # The query saves to a dictionary, using the PMID as the key. abstracts = {} for id in pmids: article = fetch.article_by_pmid(id) abstracts[id] = [ article.title, article.abstract, article.journal, article.year, article.authors ] # Save the dictionary as a dataframe df = pd.DataFrame.from_dict( abstracts, orient='index', columns=['Title', 'Abstract', 'Journal', 'Year', 'Authors']) # Save the dataframe df.index.name = 'PMID' df.to_csv(savepath) return df
def measure_all_from_query(query): fetch = PubMedFetcher() pm_ids = fetch.pmids_for_query(query) if len(pm_ids) > 8: pm_ids = pm_ids[:8] scores = [] nlp = spacy.load('en_core_sci_lg') for id in pm_ids: scores.append((id, measure_similarity_abstracts(nlp, id))) return scores
class TestPubmedFetcher(unittest.TestCase): def setUp(self): self.fetch = PubMedFetcher() def tearDown(self): pass def test_pmids_for_query(self): params = {'journal': 'PLoS One', 'year': 2013, 'author': 'McMurry AJ'} pmids = self.fetch.pmids_for_query(**params) assert len(pmids) == 1 assert pmids[0] == '23533569' # this pubmed ID was deleted params = {'TA': 'Journal of Neural Transmission', 'pdat': 2014, 'vol': 121, 'aulast': 'Freitag' } pmids = self.fetch.pmids_for_query(**params) assert len(pmids) == 0 def test_medical_genetics_query(self): # we presume that the results for a fixed year prior to this one will not change. results = self.fetch.pmids_for_medical_genetics_query('Brugada Syndrome', 'diagnosis', debug=True, year=2013) assert '24775617' in results def test_clinical_query(self): # we presume that the results for a fixed year prior to this one will not change. results = self.fetch.pmids_for_clinical_query('Global developmental delay', 'etiology', 'narrow', debug=True, year=2013) assert results[0] == '24257216' assert results[1] == '24123848' assert results[2] == '24089199' def test_specified_return_slice(self): pmids = self.fetch.pmids_for_query(since='2015/3/1', retmax=1000) assert len(pmids) == 1000 pmids = self.fetch.pmids_for_query(since='2015/3/1', retstart=200, retmax=500) assert len(pmids) == 500 def test_pmc_only(self): params = {'mesh': 'breast neoplasm'} stuff = self.fetch.pmids_for_query(since='2015/1/1', until='2015/3/1', pmc_only=True, **params) print(stuff) def test_ncbi_book_id_to_pubmed(self): for eg in NCBI_BOOKS: result = self.fetch.pmids_for_query(eg['book_id']) if len(result) > 0: assert result[0] == eg['pmid'] else: assert eg['pmid'] is None print(eg, result)
class TestPubMedArticle(unittest.TestCase): def setUp(self): self.fetch = PubMedFetcher() def tearDown(self): pass def test_random_efetch(self): pmid = str(random.randint(22222222, 23333333)) try: article = self.fetch.article_by_pmid(pmid) if article is not None: assert article.pmid == pmid assert article.title is not None except InvalidPMID: self.test_random_efetch() # print "PMID %s returned InvalidPMID response (which is totally OK). Run test again!" % pmid def test_init1(self): """ Test on the xml returned by eutils """ article = PubMedArticle(xml_str1) assert str(article.pmid) == '4' def test_init2(self): """ Test on the xml downloaded from medline """ article = PubMedArticle(xml_str2) assert str(article.pmid) == '23697015' def test_to_dict(self): article = PubMedArticle(xml_str1) self.assertTrue(isinstance(article.to_dict(), dict))
def pmid_article(ref, user=None): article_array = [] if user: if user.email is not None: for pmid in Reference.pmid(ref): url = "http://www.ncbi.nlm.nih.gov/pubmed/" + str(pmid) lookup = PubMedLookup(url, user.email) publication = Publication(lookup) article_array.append(publication) fetch = PubMedFetcher() for pmid in Reference.pmid(ref): article = fetch.article_by_pmid(pmid) article_array.append(article) return article_array
def _pubmed_pmid_to_article(pmid): """ Use NCBI eutils to fetch pubmed article information. :param pmid: int or str :return: PubMedArticle """ return PubMedFetcher().article_by_pmid(str(pmid))
def _pubmed_pmid_to_article(pmid): """ Use eutils to fetch pubmed article information. TODO: integration with eutils. :param pmid: int or str :return: PubMedArticle """ return PubMedFetcher('eutils').article_by_pmid(str(pmid))
def __init__(self, pmid): self.pmid = pmid fetch = PubMedFetcher(email='*****@*****.**') article = fetch.article_by_pmid(pmid) self.title = article.title self.journal = article.journal self.authors = article.authors # pm_cited - which papers cited current paper try: self.pm_cited = fetch.related_pmids(pmid)['citedin'] except: self.pm_cited = None self.h_index = self.get_H_index() + 1 # self.h_index = 1 # pm_cite - which papers cited by current paper self.pm_cite = [] print("create paper with pmid" + pmid)
def fetch_pubmed(pub_id, id_type = "pmid"): """ Fetches and formats pub data from pubmed """ pm = PubMedFetcher() if id_type == 'doi': try: result = pm.article_by_doi(pub_id) except (AttributeError, MetaPubError, EutilsNCBIError): return None elif id_type == "pmid": try: result = pm.article_by_pmid(pub_id) except (AttributeError, InvalidPMID, EutilsNCBIError): return None elif id_type == "pmc": try: result = pm.article_by_pmcid('PMC' + str(pub_id)) except (AttributeError, MetaPubError, EutilsNCBIError): return None result = result.to_dict() # Set link using DOI if result.get('doi'): result['url'] = "http://dx.doi.org/" + result.get('doi') else: result['url'] = result.get('url') # Provide PDF if possible if result.get('pmc'): result['pdf_url'] = f"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC{result['pmc']}/pdf" out = {"pub_title": result.get('title'), "pub_authors": result.get('authors'), "pub_abstract": result.get('abstract'), "pub_doi": result.get('doi'), "pub_pmid": result.get('pmid'), "pub_pmc": pub_id if id_type == 'pmc' else None, "pub_url": result.get('url'), "pub_pdf_url": result.get('pdf_url') or 'searching', "pub_journal": result.get('journal'), "pub_date": result['history'].get('pubmed')} return out
def get_info_by_PMID(PMID: str) -> Dict: '''This function takes a PMID str, requests information about the corresponding article via metapub and checks if all necessary information has been retrieved.''' article_dict = {} fetch = PubMedFetcher() try: article = fetch.article_by_pmid(PMID) # Save information in Dict for info in dir(article): if info[0] != '_': article_dict[info] = eval('article.' + info) except MetaPubError: pass #if contains_minimal_information(article_dict): # Add data retrieval info to the dict and return it article_dict = add_retrieval_information(article_dict, 'MetaPub', 'PMID', PMID) return article_dict
def _pubmed_central_pmcid_to_article(pmcid): """ Specific to PMC PubMed Central. Use eutils to fetch pubmed article information. TODO: integration with eutils. :param pmcid: :return: PubMedArticle """ return PubMedFetcher('eutils').article_by_pmcid(str(pmcid))
def _pubmed_central_pmcid_to_article(pmcid): """ Specific to PMC PubMed Central. Use NCBI eutils to fetch pubmed article information. :param pmcid: :return: PubMedArticle """ return PubMedFetcher().article_by_pmcid(str(pmcid))
def paper2dict(pmid, doi): """Sync paper's information into prophet database.""" # For single paper from metapub import PubMedFetcher fetch = PubMedFetcher() if not (input_file or pmid or doi): print(bcolors.FAIL) print("You need to specified -i/-p/-d.", bcolors.ENDC) sys.exit(1) else: article = fetch_paper(pmid=pmid, doi=doi) print(obj2dict(article))
def downloadAbstract(self, keywords, file_name,max_return=1e+6): fetcher = PubMedFetcher(cachedir=self.cache_dir, api_key=self.api_key) pmids = fetcher.pmids_for_query(keywords, retmax=max_return) corpus = ET.Element('corpus') keywords_item = ET.SubElement(corpus, 'keywords') keywords_item.text = keywords for pmid in pmids: print(pmid) fetcher._eutils_article_by_pmid(pmid) doc = fetcher.article_by_pmid(pmid) title_str = self.removeHtmlTags(doc.title) abstract_str = self.removeHtmlTags(doc.abstract) if abstract_str == '': continue doc_item = ET.SubElement(corpus, 'article') doc_item.set('id', pmid) title_item = ET.SubElement(doc_item, 'title') title_item.text = title_str abstract_item = ET.SubElement(doc_item, 'abstract') abstract_item.text = abstract_str corpus_in_string = ET.tostring(corpus) xml_file = open(file_name, 'wb') xml_file.write(corpus_in_string)
def get_reference_from_pmid_by_metapub(pmid:str)->dict: fetch = PubMedFetcher(cachedir=cache) reference = None try: time.sleep(0.34) article = fetch.article_by_pmid(pmid) reference = {'journal':article.journal, 'authors': article.authors, 'issue':article.issue, 'first_page':article.first_page, 'last_page': article.last_page, 'volume':article.volume, 'year': str(article.year), 'abstract': replace_characters(article.abstract), 'title': replace_characters(article.title), 'doi': article.doi, 'pmid': article.pmid } except: print('*** Bad PMID:',pmid) return reference
def filter_results(results, words_in_tilte, limit): fetch = PubMedFetcher(email='*****@*****.**') filtered_results = [] counter = 0 for paper in results: pmid = paper.split('/')[-1].split('\n')[0] article = fetch.article_by_pmid(pmid) for words in words_in_tilte: include = False for word in words: if word.strip().lower() in article.title.lower(): include = True continue if not include: break if include: filtered_results.append(paper) counter += 1 if counter == limit: return filtered_results return filtered_results
def crawl_chem_json(self, keyword, retmax=300): fetch = PubMedFetcher() pmids = fetch.pmids_for_query(keyword, retmax=retmax) self.textBrowser_value.emit("Scanning Iteration : " + str(retmax)) self.textBrowser_value.emit("Expected Running Time : " + str(retmax * 2) + " seconds.") self.textBrowser_value.emit("PMID Scan Done!") self.progress_bar_value.emit(self.count) json_dicts = [] self.textBrowser_value.emit("Crawling Paper Info..") for i in range(len(pmids)): pmid = pmids[i] try: if int(i / len(pmids) * 100) > self.count: self.count = int(i / len(pmids) * 100) self.progress_bar_value.emit(self.count) try: article = fetch.article_by_pmid(pmid) except: self.textBrowser_value.emit("Error reading " + str(pmid)) continue chemical = article.chemicals if not chemical: continue json_dicts.append(chemical) except: continue self.textBrowser_value.emit("Progress Done!") return json_dicts
def search(request): ctx = { 'query_saved' : None, 'saved_pmids':[], 'total_saved_queries':SearchStash.objects.filter(user=request.user).count(), } f = PubMedFetcher() initial = {} query_saved = None try: query_saved = SearchStash.objects.get(search_used=request.GET.get('q')) except: pass else: ctx['saved_pmids'] = [pub.pmid for pub in query_saved.pmids.all()] ctx['query_saved'] = query_saved if not ctx['query_saved'] and request.GET.get('q'): messages.add_message(request, messages.INFO, '<strong>Note:</strong> You must click "Save Query" above to start capturing publications for this query.') if request.GET.get('q', None): keywords = request.GET.get('q', None) initial['q'] = request.GET.get('q') pmids = f.pmids_for_query(query=keywords, retmax=100) pmid_list = [] for pmid in pmids: new_pmid = Publication.objects.get_or_create(pmid=pmid)[0] row = { 'pmid': new_pmid.pmid, } pmid_list.append(row) ctx['keywords'] = keywords ctx['pmids'] = pmids ctx['pmid_list'] = pmid_list ctx['result_count'] = len(pmids) form = PubMedForm(initial=initial) ctx['form'] = form return render(request, 'lum/search.html', ctx)
def measure_similarity_abstracts(nlp, pmid): def scrape_related_abstracts(pm_id): related_ids = scrape_related_ids(pm_id) if len(related_ids) > 8: related_ids = related_ids[:8] abstracts = [] for related in related_ids: starter = 'https://pubmed.ncbi.nlm.nih.gov/' link = starter + related data = requests.get(link).text soup = BeautifulSoup(data, 'html.parser') abstract_header = soup.find('div', {'id': 'en-abstract'}) try: abstract = str(abstract_header.p.string).strip() abstracts.append(abstract) except: pass return abstracts fetch = PubMedFetcher() exemplary = fetch.article_by_pmid(pmid).abstract doc1 = nlp(exemplary) scores = [] for abstract in scrape_related_abstracts(pmid): doc2 = nlp(abstract) scores.append(doc1.similarity(doc2)) return mean(scores)
def crawl_chem_json(keyword, retmax=1000): fetch = PubMedFetcher() pmids = fetch.pmids_for_query(keyword, retmax=retmax) print("PMID scan Done!") json_dicts = [] print("Crawling Paper Info..") for pmid in tqdm(pmids): try: article = fetch.article_by_pmid(pmid) except: print("Error reading " + str(pmid)) continue chemical = article.chemicals if not chemical: continue json_dicts.append(chemical) print("Process Done!") return json_dicts
def test_article_by_pmid(self): pmid = '4' fetch = PubMedFetcher() article = fetch.article_by_pmid(pmid) assert str(article.pmid) == pmid pmid = '25763451' fetch = PubMedFetcher() article = fetch.article_by_pmid(pmid) assert str(article.pmid) == pmid
def search(entry): fetch = PubMedFetcher() try: article = fetch.article_by_pmid(entry['pmid']) except: try: article = fetch.article_by_pmcid(entry['pmcid']) except: try: article = fetch.article_by_doi(entry['doi']) except: try: pmids = fetch.pmids_for_citation(authors=entry['author'], journal=entry['journal'], year=entry['year'], volume=entry['volume']) # pmids2 = fetch.pmids_for_query(entry['title']) article = fetch.article_by_pmid(pmids[0]) except: return None return article
def search(source = "PubMed", level = "basic", db = "PubMed", query = None, unlabeled_string = None, affiliation = None, article_identifier = None, all_fields = None, author = None, author_identifier = None, book = None, corporate_author = None, create_date = None, completion_date = None, conflict_of_interest = None, ec_rn_number = None, editor = None, entrez_date = None, filter_citations = None, first_author_name = None, full_author_name = None, full_investigator_name = None, grant_number = None, investigator = None, isbn = None, issue = None, journal = None, language = None, last_author = None, location_id = None, mesh_date = None, mesh_major_topic = None, mesh_subheadings = None, mesh_terms = None, modification_date = None, nlm_unique_id = None, other_term = None, owner = None, pagination = None, personal_name_as_subject = None, pharmacological_action = None, place_of_publication = None, pmid = None, publisher = None, publication_date = None, publication_type = None, retmax = None, retmode = None, secondary_source_id = None, sort = None, subset = None, supplementary_concept = None, text_words = None, title = None, title_abstract = None, transliterated_title = None, uid = None, volume = None, raw = False, exact = False, user = None): if source.lower() in ["pubmed"] and level.lower() == "complex": return eutils_search(db = db, retmode = retmode, retmax = retmax, sort = sort, unlabeled_string = unlabeled_string, affiliation = affiliation, article_identifier = article_identifier, all_fields = all_fields, author = author, author_identifier = author_identifier, book = book, corporate_author = corporate_author, create_date = create_date, completion_date = completion_date, conflict_of_interest = conflict_of_interest, ec_rn_number = ec_rn_number, editor = editor, entrez_date = entrez_date, filter_citations = filter_citations, first_author_name = first_author_name, full_author_name = full_author_name, full_investigator_name = full_investigator_name, grant_number = grant_number, investigator = investigator, isbn = isbn, issue = issue, journal = journal, language = language, last_author = last_author, location_id = location_id, mesh_date = mesh_date, mesh_major_topic = mesh_major_topic, mesh_subheadings = mesh_subheadings, mesh_terms = mesh_terms, modification_date = modification_date, nlm_unique_id = nlm_unique_id, other_term = other_term, owner = owner, pagination = pagination, personal_name_as_subject = personal_name_as_subject, pharmacological_action = pharmacological_action, place_of_publication = place_of_publication, pmid = pmid, publisher = publisher, publication_date = publication_date, publication_type = publication_type, secondary_source_id = secondary_source_id, subset = subset, supplementary_concept = supplementary_concept, text_words = text_words, title = title, title_abstract = title_abstract, transliterated_title = transliterated_title, uid = uid, volume = volume, raw = raw, exact = exact) elif source.lower() in ["pubmed"] and level.lower() == "basic": # Use 'unlabeled_string' or 'query' here. # This function already takes completed # PubMed queries as strings (with # various connectors and constructors). if unlabeled_string: fetch = PubMedFetcher() pubmed_id_list = fetch.pmids_for_query(unlabeled_string) ref_list = [] for pubmed_id in pubmed_id_list: article = fetch.article_by_pmid(pubmed_id) # Need a faster way to get titles... temp_ref = Reference(identifier = str(pubmed_id), identifier_type = "PubMed ID", source = "PubMed", name = article.title) ref_list.append(temp_ref) return ref_list elif query: # This is where the basic reference # search redirects for now, but it # is relatively slow. fetch = PubMedFetcher() pubmed_id_list = fetch.pmids_for_query(query) ref_list = [] for pubmed_id in pubmed_id_list: try: article = fetch.article_by_pmid(pubmed_id) # Need a faster way to get titles... temp_ref = Reference(identifier = str(pubmed_id), identifier_type = "PubMed ID", source = "PubMed", name = article.title) ref_list.append(temp_ref) except metapub.exceptions.InvalidPMID: print("An invalid PMID error occurred.") temp_ref = Reference(identifier = str(pubmed_id), identifier_type = "PubMed ID", source = "PubMed") ref_list.append(temp_ref) else: temp_ref = Reference(identifier = str(pubmed_id), identifier_type = "PubMed ID", source = "PubMed") ref_list.append(temp_ref) return ref_list elif source.lower() in ["google", "google scholar"]: return google_scholar_search(unlabeled_string) elif source.lower() in ["openlibrary"]: return openlibrary_search(unlabeled_string)
def __init__(self, cache_file_name, hIndex_filename, no_index_filename, label_filename, label_ratio_thresh, output_filename, workers, rsp): self.paper_info = {} self.workers = workers self.label_filename = label_filename if workers: self.label_data_with_workers(label_filename) else: self.label_data(label_filename) self.paper_cache = PaperCache(cache_file_name) self.hIndex = HIndex(hIndex_filename) self.label_ratio_thresh = label_ratio_thresh self.output_filename = output_filename self.fetcher = PubMedFetcher(email='*****@*****.**') paper_builder = PaperBuilder(self.hIndex, self.paper_cache, self.fetcher, no_index_filename) self.papers_network = P_N(list(self.paper_info.keys()), paper_builder) for pmid, p in self.papers_network.csv_papers_dict.items(): self.paper_info[pmid]['Journal hIndex'] = p.h_index self.paper_info[pmid]['year'] = p.year self.rsp = rsp
def handle(self, *args, **options): csvfile = "pmids.csv" with open(csvfile, 'rU') as f: reader = csv.reader(f) count = 0 for row in reader: pmid = self.get_pmid(row) if count == 0: count += 1 continue #skip first header row if Publication.objects.filter(pmid=pmid).count() > 0: count += 1 continue if count == 3000: break ref_id = row[0] ref_type = row[1] year = row[3] article_title = row[4] secondary_author = row[5] journal_title = row[6] place_published = row[7] publisher = row[8] volume = row[9] issue = row[10] pages = row[11] date = row[12] #bad data in csv, don't use... alt_journal = row[13] doi = self.get_doi(row) print doi #pmid_from_ref = row[15] #pmid_from_updates = row[16] abstract = row[17] url = row[18] file_attachments = row[19] author_address_from_pubmed = row[20] #empty column figure = row[21] cis_acc = row[22] access_date = row[23] luminex_product = row[24] db_name = row[25] db_provider = row[26] language = row[27] reprint_author_name = row[28] blank = row[29] reprint_author_email = row[30] ecopy = row[39] paper_type = row[40] species = row[41] assay = row[42] sample_type = row[43] #this is the article title... whos_kit = row[44] misc = row[45] application = row[46] market_segment = row[47] subsidiary_author = row[48] custom_6 = row[49] issn = row[51] pub = Publication( title=sample_type, pmid=pmid, doi=doi, abstract=abstract, ) fresh_data = None if len(pmid) < 1: fetch = PubMedFetcher() try: fresh_data = fetch.article_by_doi(doi) fresh_data = fresh_data.to_dict() except: pass else: pub.pmid = fresh_data['pmid'] pub.save() self.add_cis_tags(pub, row) authors = self.get_authors(row) for author in authors: pub.authors.add(author) lab = self.get_lab(row) if lab: for author in authors: author.labs.add(lab) pub.labs.add(lab) count += 1
def pmids_from_citation(author_last_name = None, year = None, volume = None, first_page = None, journal_title = None): fetch = PubMedFetcher() return fetch.pmids_from_citation(aulast = author_last_name, year = year, volume = volume, first_page = first_page, jtitle = journal_title)
def psearch(pmid): fetch = PubMedFetcher() ret = fetch.article_by_pmid(pmid) print ret.to_dict()