示例#1
0
def _get_pdf(k, title):
    # Skip if exists
    if os.path.isdir(out_dir(k)) and os.listdir(out_dir(k)):
        return os.path.join(out_dir(k), os.listdir(out_dir(k))[0])
    # Normalize title
    google_search = scholarly.search_pubs(title)
    google_result = next(google_search)
    print(title)
    title = google_result['bib']['title'] + ' ' + (' '.join(
        google_result['bib']['author']))
    print(title)
    # Get DOI
    try:
        found, bib_string = get_bib_from_title(title)
    except Exception as e:
        print("Error while getting DOI", e)
        return None
    # Download
    if found:
        bib = bibtexparser.loads(bib_string).entries
        if bib and ("doi" in bib[0]) and (bib[0]['ENTRYTYPE'] == 'article'):
            doi = bib[0]["doi"]
            try:
                SciHub(doi, out_dir(k)).download(choose_scihub_url_index=3)
            except Exception as e:
                print("Error while downloading", e)
                return None
            pdf = os.path.join(out_dir(k),
                               os.listdir(out_dir(k))[0]) if os.listdir(
                                   out_dir(k)) else None
            return pdf
        else:
            print(bib)
            print("\tAbsent DOI")
    return None
示例#2
0
def update_bib(bib, get_first=True):
    bib_id = bib["ID"]
    if "doi" not in bib and "title" in bib:
        found, bib_string = get_bib_from_title(bib["title"], get_first)
        if found:
            bib = bibtexparser.loads(bib_string).entries[0]
    bib["ID"] = bib_id
    return bib
示例#3
0
def download_from_title(title, location="", use_libgen=False):
    found, bib_string = get_bib_from_title(title)
    if found:
        bib = bibtexparser.loads(bib_string).entries[0]
        if "doi" in bib:
            pdf_name = "{}.pdf".format(bib["doi"].replace("/", "_"))
            bib["pdf_file"] = location + pdf_name
            if use_libgen:
                download_from_libgen(bib["doi"], bib["pdf_file"])
            else:
                found, bib = download_from_scihub(bib["doi"], bib["pdf_file"])
        else:
            print("\tAbsent DOI")
示例#4
0
    def search(self, get_first=False):
        """
        get doi from crossref 
        """
        new_paper = {}
        found, bib_string = get_bib_from_title(self.title, get_first=get_first)
        # print((bib_string))
        if found:
            try:
                if bibtexparser.loads(bib_string).entries:
                    bib = bibtexparser.loads(bib_string).entries[0]
                    # return bib

                    new_paper['title'] = bib['title']
                    self.title = bib['title']
                    new_paper['doi'] = bib['doi']
                    self.doi = bib['doi']
                else:
                    self.doi = re.search(r'doi = {(.*?)}', bib_string,
                                         re.S).group(1)
                    # self.title = re.search(r'title = {(.*?)}',bib_string,re.S).group(1)

            except:
                raise IndexError("no doi find")