def _get_pdf(k, title): # Skip if exists if os.path.isdir(out_dir(k)) and os.listdir(out_dir(k)): return os.path.join(out_dir(k), os.listdir(out_dir(k))[0]) # Normalize title google_search = scholarly.search_pubs(title) google_result = next(google_search) print(title) title = google_result['bib']['title'] + ' ' + (' '.join( google_result['bib']['author'])) print(title) # Get DOI try: found, bib_string = get_bib_from_title(title) except Exception as e: print("Error while getting DOI", e) return None # Download if found: bib = bibtexparser.loads(bib_string).entries if bib and ("doi" in bib[0]) and (bib[0]['ENTRYTYPE'] == 'article'): doi = bib[0]["doi"] try: SciHub(doi, out_dir(k)).download(choose_scihub_url_index=3) except Exception as e: print("Error while downloading", e) return None pdf = os.path.join(out_dir(k), os.listdir(out_dir(k))[0]) if os.listdir( out_dir(k)) else None return pdf else: print(bib) print("\tAbsent DOI") return None
def update_bib(bib, get_first=True): bib_id = bib["ID"] if "doi" not in bib and "title" in bib: found, bib_string = get_bib_from_title(bib["title"], get_first) if found: bib = bibtexparser.loads(bib_string).entries[0] bib["ID"] = bib_id return bib
def download_from_title(title, location="", use_libgen=False): found, bib_string = get_bib_from_title(title) if found: bib = bibtexparser.loads(bib_string).entries[0] if "doi" in bib: pdf_name = "{}.pdf".format(bib["doi"].replace("/", "_")) bib["pdf_file"] = location + pdf_name if use_libgen: download_from_libgen(bib["doi"], bib["pdf_file"]) else: found, bib = download_from_scihub(bib["doi"], bib["pdf_file"]) else: print("\tAbsent DOI")
def search(self, get_first=False): """ get doi from crossref """ new_paper = {} found, bib_string = get_bib_from_title(self.title, get_first=get_first) # print((bib_string)) if found: try: if bibtexparser.loads(bib_string).entries: bib = bibtexparser.loads(bib_string).entries[0] # return bib new_paper['title'] = bib['title'] self.title = bib['title'] new_paper['doi'] = bib['doi'] self.doi = bib['doi'] else: self.doi = re.search(r'doi = {(.*?)}', bib_string, re.S).group(1) # self.title = re.search(r'title = {(.*?)}',bib_string,re.S).group(1) except: raise IndexError("no doi find")