示例#1
0
def check_arxiv_published(value,
                          field="id",
                          get_first=True,
                          keep_eprint=False):
    found = False
    published = False
    bib = ""
    value = re.sub("arxiv\:", "", value, flags=re.I)
    found, items = get_arxiv_info(value, field)
    if found:
        if get_first is False and field == "ti" and len(items) > 1:
            found, item = ask_which_is(value, items)
        else:
            item = items[0]
    if found:
        if "arxiv_doi" in item:
            doi = item["arxiv_doi"]
            published, bib = get_bib_from_doi(doi)
            if keep_eprint:
                eprint = re.split('/|v', item["id"])[-2]
                bib = add_eprint_to_bib(bib, eprint)
        else:
            bib = generate_bib_from_arxiv(item, value, field)
    else:
        print("\t\nArxiv not found.")
    return found, published, bib
示例#2
0
def update_bib(bib):
    bib_id = bib["ID"]
    if "doi" in bib:
            found, bib_string = get_bib_from_doi(bib["doi"])
            if found:
                bib = bibtexparser.loads(bib_string).entries[0]

    bib["ID"] = bib_id
    return bib
示例#3
0
def get_bib_from_title(title, get_first=False, abbrev_journal=False):
    found = False
    bib = ""
    found, item = get_from_title(title, get_first)
    if found:
        if item["is_crossref"]:
            if "DOI" in item:
                doi = item["DOI"]
                found, bib = get_bib_from_doi(doi)

        else:
            if "arxiv_doi" in item:
                doi = item["arxiv_doi"]
                published, bib = get_bib_from_doi(doi, abbrev_journal)
            else:
                bib = generate_bib_from_arxiv(item, title, field="ti")

            # if "short-container-title" in item:
            # abbreviated_journal = item["short-container-title"][0]##aqui pode acontecer de realizar a chamada para

    return found, bib
示例#4
0
def pdf2bib(pdf_file):
    """
    Given a PDF file, tries to extract the paper's DOI and fetch the BibTex entry
    :param pdf_file: the path to the PDF file
    :return: The bibtex entry as a string
    """

    found_a_doi = False
    bib_string = ''

    pdf_text = get_pdf_page_text(pdf_file)

    # Try each of the regexes in sequence. Hopefully one will work.
    for doi_re in all_doi_res:
        doi_match = doi_re.search(pdf_text)
        # If we did not find a match, try the next one. If we did, make a note of that, because that
        # will affect the error we give if this fails.
        if doi_match is None:
            continue
        else:
            found_a_doi = True

        doi_string = doi_match.group(0)

        # Assume (for now) that the DOI suffix cannot include Unicode. This will stop the DOI at the first non-ASCII
        # character. Which corrects an issue with e.g. doi:10.5194/acp-11-8543-2011 where in the PDF the (c) symbol comes
        # right after the DOI and gets included
        last_idx = 0
        for idx, char in enumerate(doi_string):
            if ord(char) > 127:
                break
            else:
                last_idx = idx + 1
        doi_string = doi_string[:last_idx]

        root_logger.debug('Looking up DOI "{}"'.format(doi_string))

        # Try to retrieve the bib string based on the doi. If we do so successfully, go ahead and return.
        # If not, then try the next regex. If there are none left, then we'll leave bib_string as an empty
        # string and raise the appropriate error.
        success, bib_string = get_bib_from_doi(doi_string)
        if success:
            break
        else:
            bib_string = ''

    if not found_a_doi:
        raise DoiNotFoundError('DOI search failed on {}'.format(pdf_file))
    elif len(bib_string) == 0:
        raise BibRetrievalError(
            'Bib string lookup failed on {}'.format(pdf_file))

    return bib_string
示例#5
0
def main(section):
    output = ""
    out = reportinator.cache + "/output.bib"
    inp = reportinator.cache + "/dois.txt"
    f = open(out, "w+")
    fp = open(inp, "r")
    for line in fp:
        doi = line.split(" ")[0]
        found, bib = get_bib_from_doi(doi)
        if found:
            f.write(bib + "\n")
    output += "\n\\section{References}\n"
    output += "\\nocite{" + "*}\n"
    output += "\\printbibliography[heading=none]\n\n"
    return output
示例#6
0
def _to_bibtex(doi, template, idx):
    try:
        from doi2bib.crossref import get_bib_from_doi
    except ImportError:
        print(
            "Cannot generate BibTeX citation, missing doi2bib dependency",
            file=sys.stderr,
        )
        return doi

    if "doi.org" not in doi:
        return doi
    bib = get_bib_from_doi(doi)[1]
    # replace identifier with template name
    m = re.search(r"([A-Z])\w+", bib)
    return bib.replace(m.group(), "%s%s" % (template.lower(), idx))
示例#7
0
def check_arxiv_published(value, field="id", get_first=True):
    found = False
    published = False
    bib = ""
    value = re.sub("arxiv\:", "", value, flags=re.I)
    found, items = get_arxiv_info(value, field)
    if found:
        if get_first is False and field == "ti" and len(items) > 1:
            found, item = ask_which_is(value, items)
        else:
            item = items[0]
    if found:
        if item["doi"] != None:
            doi = item["doi"]
            published, bib = get_bib_from_doi(doi)
        else:
            bib = generate_bib_from_arxiv(item, value, field)
    else:
        print("\t\nArxiv not found.")
    return found, published, bib
示例#8
0
    dois = set()
    with open(f) as h:
        for line in h:
            for x in re.findall('{doi:([^}]*)}',line):
                for y in x.split(','):
                    y = re.sub(' ','',y)
                    if not y.startswith('doi:'):
                        y = 'doi:'+y
                    dois.add(y)
            for x in re.findall('{https://doi.org/([^}]*)}',line):
                for y in x.split(','):
                    y = re.sub(' ','',y)
                    if not y.startswith('https://doi.org/'):
                        y = 'https://doi.org/'+y
                    dois.add(y)
    with open(args.bib, 'w') as upv:
        if args.b is not None:
            with open(args.b) as h:
                for line in h:
                    upv.write(line)
        for y in dois - labels:
            out = get_bib_from_doi(y)
            if out[0]:
                x = out[1].split(',')
                x[0] = re.sub('{[^,]*','{'+y,x[0])
                bib = ','.join(x)
                try:
                    upv.write(bib.encode('UTF-8')+'\n')
                except:
                    import pdb; pdb.set_trace()
示例#9
0
def check_arxiv_published(value,
                          field="id",
                          get_first=True,
                          keep_eprint=False):
    """

    Parameters
    ----------
        value: str
            value of the field
        field: str
            field used for the arxiv search API
        get_first: bool
        keep_eprint: bool
            If True keep the arxiv number if the paper 
            has already been published

    Returns
    -------
        found: bool
            True if found the arxiv item
        published: bool
            True if the arxiv has already been
            published
        bib: str
            bibtext string
    """

    found = False
    published = False
    bib = ""
    value = re.sub("arxiv\:", "", value, flags=re.I)
    found, items = get_arxiv_info(value, field)
    if found:
        if field == "ti":
            title = value.lower().replace(" ", "")
            for item_arxiv in items:
                title_arxiv = item_arxiv["title"].lower().replace(" ",
                                                                  "").replace(
                                                                      "\n", "")
                if title_arxiv == title:
                    items = [item_arxiv]
                    break

        if get_first is False and field == "ti" and len(items) > 1:
            found, item = ask_which_is(value, items)
        else:
            item = items[0]

    if found:
        if "arxiv_doi" in item:
            doi = item["arxiv_doi"]
            published, bib = get_bib_from_doi(doi)
            if keep_eprint:
                eprint = re.split('/|v', item["id"])[-2]
                bib = add_eprint_to_bib(bib, eprint)
        else:
            bib = generate_bib_from_arxiv(item, value, field)

    else:
        print("\t\nArxiv not found.")

    return found, published, bib