Exemplo n.º 1
0
 def update(item):
     keyword = item.keyword
     papers_existing_in_item = [x.id for x in item.papers]
     p = PubMedFetcher(keyword, num_of_documents=10, sort="pub+date")
     for paper in p.papers.values():
         paper_mongo = store_paper(paper)
         if paper_mongo.id not in papers_existing_in_item:
             item.update(push__papers=paper_mongo)
             paper_mongo.update(push__subscriptions=item)
Exemplo n.º 2
0
 def process_one(self, item):
     if len(item) < 30:
         return None
     m = re.search(
         'pmid (\d+).+?year (\d+).+?month (\d+).+?day (\d+).+?title.+?name "(.+?)".+?authors \{(.+?)\},\s*from journal.+?name "(.+?)".+?abstract "(.+?)"',
         item,
         re.DOTALL,
     )
     error_count = 1
     if m and m.group(6):
         id = m.group(1)
         title = m.group(5).replace("\n", "").strip()
         author = m.group(6)
         m_author = re.findall('name ml "(.+?)"', author)
         journal = m.group(7)
         year = int(m.group(2))
         month = int(m.group(3))
         day = int(m.group(4))
         abstract = m.group(8).replace("\n", "").strip()
         if not m_author:
             return None
         h = {
             "Source": "PubMed",
             "PMID": id,
             "Title": title,
             "Author": m_author,
             "Journal": journal,
             "Year": year,
             "Date": datetime(year, month, day),
             "Abstract": abstract,
         }
         h["URL"] = "http://www.ncbi.nlm.nih.gov/pubmed/" + h["PMID"]
         try:
             paper_mongo = store_paper(h)
             h["DBID"] = str(paper_mongo.id)
         except:
             logging.error("Store paper failed: {}".format(h["PMID"]))
         return h
     else:
         logging.warning("Parse error. #%d", error_count)
         error_count += 1
         return None