示例#1
0
文件: bib.py 项目: quicklizard99/cv
def load_bib(bib, titles):
    """Returns dict {'BibTeX ID': {record}}
    """
    with open(bib) as bibtex_file:
        parser = BibTexParser()
        parser.customization = convert_to_unicode
        # Find the url field of a misc entry
        # https://github.com/sciunto-org/python-bibtexparser/issues/93
        parser.homogenise_fields = False
        bib = bibtexparser.load(bibtex_file, parser=parser)

    with open(titles) as titles_file:
        parser = BibTexParser()
        parser.customization = convert_to_unicode
        titles = bibtexparser.load(titles_file, parser=parser)

    res = {}
    for entry in bib.entries:
        if 'journal' in entry and entry['journal'].lower() in titles.strings:
            entry['journal'] = titles.strings[entry['journal'].lower()]
        if 'author' in entry:
            # F**k me
            entry['author'] = entry['author'].replace('{́i}', 'í')
        res[entry['id'].strip()] = entry
    return res
示例#2
0
def main(argv=None) :
    if argv is None:
        argv = sys.argv
        # etc., replacing sys.argv with argv in the getopt() call.

    filename = ""

    parser = BibTexParser()
    parser.customization = customizations

    if len(argv) > 1 : 
        filename = argv[1]
    else:
        filename = "example.bib"

    with open(filename) as bibtex_file:
        bibtex_str = bibtex_file.read()

    bib_database = bibtexparser.loads(bibtex_str, parser=parser)

    #print_books(bib_database.entries)
    print_summary(bib_database.entries)
    print_journals(bib_database.entries)
    print_conferences(bib_database.entries)

    return 0;
def parseEntry(s):
    # normalize unicode by reparsing
    parser = BibTexParser()
    parser.customization = convert_to_unicode
    db1 = bibtexparser.loads(s, parser=parser)
    es = db1.entries
    return es[0]
示例#4
0
def read_bib_file(filename, homogenize=False):
    """
    Read bibtex file.

    Args:
        filename (str): path of the bibtex file.
        homogenize (bool): whether to homogenize the entries upon reading.

    Returns:
        A BibDatabase object.
    """

    # Read input bibtex file
    bibtex_str = " "
    if os.path.exists(filename):
        with open(filename, 'r', encoding='utf-8') as bibfile:
            bibtex_str = bibfile.read()
    bibtex_str += " "

    # Choose parser
    parser = None
    if homogenize:
        parser = BibTexParser(common_strings=True)
        parser.customization = nomenclature.homogenize_latex_encoding

    # Create database from string
    return bibtexparser.loads(bibtex_str, parser=parser)
示例#5
0
文件: commands.py 项目: iblg/regolith
def _ingest_citations(rc):
    import bibtexparser
    from bibtexparser.bparser import BibTexParser
    from bibtexparser.customization import getnames

    parser = BibTexParser()
    parser.ignore_nonstandard_types = False

    def customizations(record):
        for n in ["author", "editor"]:
            if n in record:
                a = [i for i in record[n].replace("\n", " ").split(", ")]
                b = [i.split(" and ") for i in a]
                c = [item for sublist in b for item in sublist]
                d = [i.strip() for i in c]
                record[n] = getnames(d)

        return record

    parser.customization = customizations
    with open(rc.filename, "r", encoding='utf-8') as f:
        bibs = bibtexparser.load(f, parser=parser)
    coll = rc.client[rc.db][rc.coll]
    for bib in bibs.entries:
        bibid = bib.pop("ID")
        bib["entrytype"] = bib.pop("ENTRYTYPE")
        if "author" in bib:
            bib["author"] = [
                a.strip() for b in bib["author"] for a in RE_AND.split(b)
            ]
        if "title" in bib:
            bib["title"] = RE_SPACE.sub(" ", bib["title"])
        rc.client.update_one(rc.db, rc.coll, {"_id": bibid}, bib, upsert=True)
    def run(self):
        sort_type = self.options.get('sort', 'date')

        # Load the publications template
        if 'template' in self.options:
            template_path = self.options['template']
            template_dir, template_name = os.path.split(template_path)
            env = Environment(loader=FileSystemLoader(template_dir))
            template = env.get_template(template_name)
        else:
            # Use template from the Pelican theme
            template = pelican_generator.get_template('publications')

        parser = BibTexParser(common_strings=True)
        parser.customization = customize

        if self.arguments:
            bibtex_path = self.arguments[0].strip()
            with open(bibtex_path, 'r') as bibtex_file:
                bib = bibtexparser.load(bibtex_file, parser=parser)
        else:
            bib = bibtexparser.loads('\n'.join(self.content), parser=parser)

        entries_to_select = self.options.get('entries', [])
        if entries_to_select:
            d = bib.entries_dict
            entries = [d[e] for e in entries_to_select]
        else:
            entries = bib.entries
        entries = sort_entries(entries, sort_type)

        rendered_template = template.render(publications=entries)
        return [nodes.raw('', rendered_template, format='html')]
示例#7
0
文件: utils.py 项目: jdumas/autobib
def read_bib_file(filename, homogenize=False):
    """
    Read bibtex file.

    Args:
        filename (str): path of the bibtex file.
        homogenize (bool): whether to homogenize the entries upon reading.

    Returns:
        A BibDatabase object.
    """

    # Read input bibtex file
    bibtex_str = " "
    if os.path.exists(filename):
        with open(filename, 'r', encoding='utf-8') as bibfile:
            bibtex_str = bibfile.read()

    # Choose parser
    parser = None
    if homogenize:
        parser = BibTexParser()
        parser.customization = nomenclature.homogenize_latex_encoding

    # Create database from string
    return bibtexparser.loads(bibtex_str, parser=parser)
示例#8
0
文件: zbl.py 项目: siudej/Cite
 def _processResults(self, data):
     """ Get bibtex data from zbMATH website. """
     bibs = re.findall("(?si)bibtex/.*?\d{3,}\.bib", data)
     data = []
     import bibtexparser
     from bibtexparser.bparser import BibTexParser
     parser = BibTexParser()
     parser.customization = customizations
     if self.otherID:
         # setup for MRef fetching
         from msn import MRef
         mr = MRef()
     for bib in bibs:
         bibtext = urllib.urlopen("https://zbmath.org/" + bib).read()
         zbl = bibtexparser.loads(bibtext, parser=parser)
         if self.otherID and mr.fetch(bibtext):
             # found MRef match for zbMATH record
             msn = bibtexparser.loads(mr.refs)
             # use MSN bibtex entry with zbl number added
             # and doi transfered if missing
             msn.entries[0]['zbl'] = zbl.entries[0]['zbl']
             if 'doi' not in msn.entries[0] and 'doi' in zbl.entries[0]:
                 msn.entries[0]['doi'] = zbl.entries[0]['doi']
             zbl = msn
         data.append(bibtexparser.dumps(zbl))
     self.refs = "\n".join(data)
示例#9
0
    def _FindBibEntriesParser(self):
        """
        """
        ret = []
        parser = BibTexParser()
        parser.customization = bib_customizations
        for filename in self._Walk(self._main_directory, ".bib"):
            skip, cache = self._CacheDataAndSkip(filename)
            if skip:
                ret.extend(cache)
                continue

            resp = []
            with open(filename) as bibtex_file:
                bib_database = bibtexparser.load(bibtex_file, parser=parser)
                for entry in bib_database.entries:
                    if 'ID' not in entry:
                        continue
                    title = entry['title']
                    author = entry['author']
                    resp.append(
                        responses.BuildCompletionData(
                            entry['ID'], "%s (%s)" % (title, author)))

            ret.extend(resp)
            self._cached_data[filename] = resp
        return ret
def parse_bibtex(filename):
    with open(filename) as bibtex_file:
        parser = BibTexParser()
        parser.customization = convert_to_unicode
        #parser.customization = homogenize_latex_encoding
        bib_db = bibtexparser.load(bibtex_file, parser=parser)
    print("Parsed the bibtex file, there are {} entries\n".format(
        len(bib_db.entries)))
    all_articles = []
    for entry in bib_db.entries:
        title = "{}".format(entry['title'])
        title = title.replace("  ", " ").replace("\n", " ").replace(
            "\r", "").replace("{", "").replace("}", "")
        year = "{}".format(entry['year'])
        if 'author' in entry:
            author = entry['author']
            author = author.encode('ascii', 'ignore').decode('ascii')
            author = author.replace("  ", " ").replace("\n", " ").replace(
                "\r", "").replace("{", "").replace("}", "")
        else:
            continue
        article = {}
        article['title'] = title
        article['year'] = year
        article['author'] = author
        all_articles.append(article)
    print(all_articles)
    return all_articles
def rename_bib_file(citeDir, filename):
    my_bucket, s3_resource = aws_bucket_info()
    bibtex_file = my_bucket.Object(filename).get()['Body'].read().decode(
        'utf-8')
    parser = BibTexParser()
    parser.customization = convert_to_unicode
    bibtex_database = bibtexparser.loads(bibtex_file, parser=parser)
    entries = bibtex_database.entries[0]
    df = read_bib_df(citation_pickle_file=citation_pickle_file)
    if entries['ID'] not in df['ID'].values:
        search_query = scholarly.search_pubs_query(entries['title'])
        res = next(search_query)
        strings_to_look = ['url', 'abstract']
        search_output = []
        for ss in strings_to_look:
            try:
                search_output.append(res.bib[ss])
            except:
                search_output.append("")
        dict_to_store = {
            'ID': entries['ID'],
            'numLikes': 0,
            'Title': entries['title'],
            'Authors': entries['author'],
            'url': search_output[0],
            'Abstract': search_output[1]
        }
        store_bib_in_df(dict_to_store,
                        citation_pickle_file=citation_pickle_file)
        new_bibfile = citeDir + entries['ID'] + "_slrm.bib"
        return new_bibfile
    else:
        return ""
示例#12
0
def read_bibtex(filename):
    import bibtexparser
    from bibtexparser.bparser import BibTexParser

    def customizations(record):
        """
        custom transformation applied during parsing
        """
        record = bibtexparser.customization.convert_to_unicode(record)
        # Split author field from separated by 'and' into a list of "Name, Surname".
        record = bibtexparser.customization.author(record)
        # Split editor field from separated by 'and' into a list of "Name, Surname".
        record = editor_split(record)
        return record

    def editor_split(record):
        """
        custom transformation
        - split editor field into a list of "Name, Surname"
        :record: dict -- the record
        :returns: dict -- the modified record
        """
        if "editor" in record:
            if record["editor"]:
                record["editor"] = getnames([i.strip() for i in record["editor"].replace('\n', ' ').split(" and ")])
            else:
                del record["editor"]
        return record

    with open(filename) as f:
        parser = BibTexParser()
        parser.customization = customizations
        return bibtexparser.load(f, parser=parser).entries
    def _FindBibEntriesParser(self):
        """
        """
        ret = []
        parser = BibTexParser()
        parser.customization = bib_customizations
        for filename in self._Walk(self._main_directory, ".bib"):
            skip, cache = self._CacheDataAndSkip(filename)
            if skip:
                ret.extend(cache)
                continue

            resp = []
            with open(filename) as bibtex_file:
                bib_database = bibtexparser.load(bibtex_file, parser=parser)
                for entry in bib_database.entries:
                    if 'ID' not in entry:
                        continue
                    title = entry['title']
                    author = entry['author']
                    resp.append(responses.BuildCompletionData(
                        entry['ID'],
                        "%s (%s)" % (title, author)
                    ))

            ret.extend(resp)
            self._cached_data[filename] = resp
        return ret
示例#14
0
def import_bibtex(
    bibtex,
    pub_dir="publication",
    featured=False,
    overwrite=False,
    normalize=False,
    dry_run=False,
):
    """Import publications from BibTeX file"""
    from academic.cli import AcademicError, log

    # Check BibTeX file exists.
    if not Path(bibtex).is_file():
        err = "Please check the path to your BibTeX file and re-run"
        log.error(err)
        raise AcademicError(err)

    # Load BibTeX file for parsing.
    with open(bibtex, "r", encoding="utf-8") as bibtex_file:
        parser = BibTexParser(common_strings=True)
        parser.customization = convert_to_unicode
        parser.ignore_nonstandard_types = False
        bib_database = bibtexparser.load(bibtex_file, parser=parser)
        for entry in bib_database.entries:
            parse_bibtex_entry(
                entry,
                pub_dir=pub_dir,
                featured=featured,
                overwrite=overwrite,
                normalize=normalize,
                dry_run=dry_run,
            )
示例#15
0
文件: fetch.py 项目: siudej/Cite
 def _bibtexQuery(self, query):
     """ Turn query into bibtex dictionary. """
     import bibtexparser
     from bibtexparser.bparser import BibTexParser
     parser = BibTexParser()
     parser.customization = homogeneize_latex_encoding
     bib = bibtexparser.loads(query, parser=parser)
     if bib.entries:
         # only the first record
         record = bib.entries[0]
         # clean up entries
         if "author" in record:
             # just last name
             record["author"] = re.sub(r',.*?(and\s*|$)', ' ',
                                       record['author'])
         if "title" in record:
             record["title"] = self._citationQuery(record["title"])[0][1]
         if "journal" in record:
             record["journal"] = self._citationQuery(record["journal"])[0][1]
         if "year" in record:
             record["date"] = record["year"]
         # only use a few fields
         # TODO add numbers
         return [(k, v) for k, v in record.items() if k in
                 {"author", "title", "journal", "mrnumber", "date",
                  "arxiv", "zbl"}]
     else:
         return []
示例#16
0
 def __init__(self,bibfile):
     parser = BibTexParser(common_strings=True)
     parser.customization = convert_to_unicode
     with open(bibfile) as bib:
         bibdb = bibtexparser.load(bib, parser=parser)
     self.entries = bibdb.entries
     self.map_keywords()
示例#17
0
def match_bibtex_to_work(bibtex_str):
    """Find works by bibtex entries

    Returns a list of matches: (entry, work)

    Doctest:

    .. doctest::

        >>> reload()
        >>> bibtex = ''' @inproceedings{murta2014a,
        ...   address = {Cologne, Germany},
        ...   author = {Murta, Leonardo and Braganholo, Vanessa and Chirigati, Fernando and Koop, David and Freire, Juliana},
        ...   booktitle = {IPAW},
        ...   pages = {71--83},
        ...   publisher = {Springer},
        ...   title = {no{W}orkflow: capturing and analyzing provenance of scripts},
        ...   year = {2014}
        ... } '''
        >>> works = match_bibtex_to_work(bibtex)
        >>> murta2014a = work_by_varname("murta2014a")
        >>> works[0][1] is murta2014a
        True
    """
    parser = BibTexParser()
    parser.customization = convert_to_unicode
    entries = bibtexparser.loads(bibtex_str, parser=parser).entries
    for entry in entries:
        entry['title'] = entry['title'].replace('{', '').replace('}', '')
    return [(entry, find_work_by_info(bibtex_to_info(copy(entry))))
            for entry in entries]
示例#18
0
def simplify(inputfile, limitn=None, verbose=False):
    with open(inputfile, 'r') as file:
        bibtex_str = file.read()
        parser = BibTexParser()
        # parser.customization = homogenize_latex_encoding
        parser.customization = convert_to_unicode
        # parser.ignore_nonstandard_types = True
        # parser.homogenise_fields = True
        # parser.common_strings = False
        # parser.encoding = 'utf8'
        # parser.customization = customizations
        bib_database = bibtexparser.loads(bibtex_str, parser=parser)
        for entry in bib_database.entries:
            print("@", str(entry['ENTRYTYPE']), "{",
                  str(entry['unique-id']).strip().translate(None, "{}") + ",")
            print(
                "\t author = {",
                str(entry['author']).strip().split('and')[0] + " and others},")
            print("\t title = {", str(entry['title']).strip(), "},")
            if 'doi' in entry:
                print("\t doi = {", str(entry['doi']).strip(), "},")
            if 'month' in entry:
                print("\t month = {", str(entry['month']).strip(), "},")
            print("\t year = {", str(entry['year']).strip(), "},")
            if 'volume' in entry:
                print("\t volume = {", str(entry['volume']).strip(), "},")
            if 'journal' in entry:
                print("\t journal = {", str(entry['journal']).strip(), "},")
            if 'number' in entry:
                print("\t number = {", str(entry['number']).strip(), "},")
            if 'booktitle' in entry:
                print("\t booktitle = {",
                      str(entry['booktitle']).strip(), "},")
            print("}\n")
示例#19
0
文件: views.py 项目: dfucci/parsifal
def import_bibtex(request):
    review_id = request.POST['review-id']
    source_id = request.POST['source-id']

    review = Review.objects.get(pk=review_id)
    source = Source.objects.get(pk=source_id)

    bibtex_file = request.FILES['bibtex']
    list_bibtex_file = fix_bibtex_file(bibtex_file.readlines())
    str_bibtex_file = '\r\n'.join(list_bibtex_file)

    ext = os.path.splitext(bibtex_file.name)[1]
    valid_extensions = ['.bib', '.bibtex']

    if ext in valid_extensions or bibtex_file.content_type == 'application/x-bibtex':
        parser = BibTexParser()
        parser.customization = convert_to_unicode
        bib_database = bibtexparser.loads(str_bibtex_file, parser=parser)
        articles = bibtex_to_article_object(bib_database, review, source)
        import pdb; pdb.set_trace()
        _import_articles(request, source, articles)
    else:
        messages.error(request, u'Invalid file type. Only .bib or .bibtex files are accepted.')

    return redirect(r('import_studies', args=(review.author.username, review.name)))
示例#20
0
def parse_bibtex(bib):
    '''Parses the BibTex returned by the DOI resolver

    Args:
        bib (str): a BibTex record

    Returns:
        Dict containing reference data
    '''
    for entity, repl in ENTITIES.iteritems():
        bib = bib.replace(entity, repl)
    # Parse BibTex using the handy dandy bibtexparser module
    import bibtexparser
    from bibtexparser.bparser import BibTexParser
    from bibtexparser.customization import convert_to_unicode
    parser = BibTexParser()
    parser.customization = convert_to_unicode
    parsed = bibtexparser.loads(bib, parser=parser).entries[0]
    # Miscellaneous clean up
    braces = re.compile(u'\{([A-Z_ \-]+|[\u0020-\uD7FF])\}', re.U)
    for key, val in parsed.iteritems():
        val = braces.sub(r'\1', val)
        if '{' in val:
            raise Exception('Unhandled LaTeX: {}'.format(val.encode('cp1252')))
        parsed[key] = val
    parsed['pages'] = parsed.get('pages', '').replace('--', '-')
    if parsed.get('publisher', '').endswith(')'):
        parsed['publisher'] = parsed['publisher'].rsplit('(', 1)[0].rstrip()
    #pp.pprint(parsed)
    return parsed
示例#21
0
def import_bibtex(bibtex,
                  pub_dir='publication',
                  featured=False,
                  overwrite=False):
    """Import publications from BibTeX file"""

    # Check BibTeX file exists.
    if not Path(bibtex).is_file():
        print('Please check the path to your BibTeX file and re-run.')
        return

    # Import Google Scholar data of Julien Vanharen = R6OO3noAAAAJ.
    scholar_data = Author("R6OO3noAAAAJ")
    with open("data/scholar_data.json", "w") as fid:
        fid.write(
            json.dumps(scholar_data.publication,
                       sort_keys=True,
                       indent=4,
                       separators=(',', ': ')))

    # Load BibTeX file for parsing.
    with open(bibtex, 'r', encoding='utf-8') as bibtex_file:
        parser = BibTexParser()
        parser.customization = convert_to_unicode
        bib_database = bibtexparser.load(bibtex_file, parser=parser)
        for entry in bib_database.entries:
            parse_bibtex_entry(entry,
                               scholar_data,
                               pub_dir=pub_dir,
                               featured=featured,
                               overwrite=overwrite)
示例#22
0
def get_references(filepath):
    with open(filepath) as bibtex_file:
        parser = BibTexParser()
        parser.customization = customizations
        bib_database = bibtexparser.load(bibtex_file, parser=parser)
        # print(bib_database.entries)
    return bib_database
示例#23
0
def parse_urlfile(url_file):
    """
    take a file of the form

    category: ads url

    and get the bibtex from the URL and return a list of Paper objects
    with the category stored as the subject

    """

    papers = []

    with open(url_file) as f:

        parser = BibTexParser()
        parser.customization = customizations

        for line in f:
            if line.startswith("#") or line.strip() == "": continue

            subject, url = line.split(": ")

            # for the ADS bibtex URL, lop off the paper_id
            paper_id = url.strip().split("/")[-1]
            bibtex_url = "http://adsabs.harvard.edu/cgi-bin/nph-bib_query?bibcode={}&data_type=BIBTEX".format(paper_id)

            # get the bibtex in html -- this is a little tricky, since
            # urlopen gives us a byte object that we need to decode
            # into unicode before we can play with it.
            print(bibtex_url)
            with urllib.request.urlopen(bibtex_url) as response:
                bibtex_html = response.read()

            raw_bibtex_html = bibtex_html.splitlines()

            bibtex_string = ""
            for line in raw_bibtex_html:
                bibtex_string += "{}\n".format(line.decode("utf8"))

            # strip off any header and just leave the bibtex
            found_start = False
            bibtex = ""
            for line in bibtex_string:
                if line.startswith("@"):
                    found_start = True
                if found_start:
                    bibtex += line

            # parse the bibtex string
            bib_database = bibtexparser.loads(bibtex, parser=parser)

            for e in bib_database.entries:
                p = extract_paper_info(e)
                if not e is None:
                    p.subject = subject
                    papers.append(p)

    papers.sort(reverse=True)
    return papers
示例#24
0
def getentries(filename):
    try:
        save_import_file(filename)
    except IOError as e:
        logg.error("bibtex import: save import file failed: {}".format(e))
        raise IOError("save import file failed")

    # use utf-8-sig instead of utf-8 to get rid of BOM_UTF8, which confuses bibtex parser
    for encoding in ('utf-8-sig', 'utf-16', None):
        try:
            error = None
            fi = codecs.open(filename, "r", encoding=encoding)
            parser = BibTexParser()
            # accept also non standard records like @SCIENCEREPORT
            parser.ignore_nonstandard_types = False
            parser.customization = _bibteximport_customize
            bibtex = bibtex_load(fi, parser=parser)
            # seems to be the correct encoding, don't try other encodings
            break
        except Exception as e:
            # check if there is a utf-encoding error, then try other encoding
            if (encoding is 'utf-8-sig' and str(e).lower().find('utf8') >= 0) or \
                (encoding is 'utf-16' and str(e).lower().find('utf-16') >= 0):
                continue
            error = e
            break

    if error:
        logg.error("bibtex import: bibtexparser failed: {}".format(e))
        raise ValueError("bibtexparser failed")

    return bibtex.entries
示例#25
0
def import_bibtex(request):
    review_id = request.POST['review-id']
    source_id = request.POST['source-id']

    review = Review.objects.get(pk=review_id)
    source = Source.objects.get(pk=source_id)

    bibtex_file = request.FILES['bibtex']

    ext = os.path.splitext(bibtex_file.name)[1]
    valid_extensions = ['.bib', '.bibtex']

    if ext in valid_extensions or bibtex_file.content_type == 'application/x-bibtex':
        parser = BibTexParser()
        parser.customization = convert_to_unicode
        bib_database = bibtexparser.load(bibtex_file, parser=parser)
        articles = bibtex_to_article_object(bib_database, review, source)
        _import_articles(request, source, articles)
    else:
        messages.error(
            request,
            u'Invalid file type. Only .bib or .bibtex files are accepted.')

    return redirect(
        r('import_studies', args=(review.author.username, review.name)))
示例#26
0
文件: cli.py 项目: jasonzou/MyPapers
def convert(inpath):
    '''
    Convert from bibtex to bibjson.
    One argument expected: path to bibtex file.
    '''
    import bibtexparser
    from bibtexparser.bparser import BibTexParser
    import json

    parser = BibTexParser()
    with open(inpath) as bibtex_file:
        parser.customization = customizations
        bib_database = bibtexparser.load(bibtex_file, parser=parser)
    myRecords = list()
    num = 1
    records = dict()
    for record in bib_database.entries:
        record1 = dict()
        record1 = record
        record1["_id"] = num
        record1["collection"] = "test01"
        num = num + 1

        myRecords.append(record1)
        #temp = json.dumps(record, indent=2, sort_keys=True)
        #t
        #myRecords
    records["records"] = myRecords
    return records
示例#27
0
def add_bibtex_string(bibtex_str):
    """Load input bibtex string into database."""
    # parse input biblio as unicode:
    parser = BibTexParser()
    parser.customization = convert_to_unicode
    bib_database = bibtexparser.loads(bibtex_str, parser=parser)

    for bib in bib_database.entries:
        try:
            bib_entry = BiblioEntry(ID=bib.get("ID", ""),
                                    ENTRYTYPE=bib.get("ENTRYTYPE", ""),
                                    authors=bib.get("author", ""),
                                    title=bib.get("title", ""),
                                    year=bib.get("year", ""),
                                    month=bib.get("month", ""),
                                    publisher=bib.get("publisher", ""),
                                    journal=bib.get("journal", ""),
                                    school=bib.get("school", ""),
                                    pdf=bib.get("pdf", ""),
                                    url=bib.get("url", ""),
                                    tag=bib.get("tag", "undefined"),
                                    keywords=bib.get("keywords", ""))
            db.session.add(bib_entry)
            db.session.commit()
        except:
            print("Entry already in database: ", bib.get("title"))
def bib2jekyllcol(inputFile, outputDir):
    "This prints the bibtex file to output directory as jekyll collection folder(s)"

    # read and parse bib file
    with open(inputFile) as bibtex_file:
        bibtex_str = bibtex_file.read()

    parser = BibTexParser()
    parser.customization = convert_to_unicode
    bib_database = bibtexparser.loads(bibtex_str, parser=parser)

    # create dictionary for transformation of month to number
    month_list = [
        "jan", "feb", "mar", "apr", "may", "june", "july", "aug", "sept",
        "oct", "nov", "dec"
    ]

    # type names:
    type_list = [
        "title", "author", "journal", "volume", "number", "year", "month",
        "doi", "pages", "publisher", "booktitle", "note"
    ]

    if not os.path.exists(outputDir):
        os.makedirs(outputDir)
    else:
        print("Deleting existing collection file...\n")
        for file in os.listdir(outputDir):
            file_path = os.path.join(outputDir, file)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
        except Exception, e:
            print e
示例#29
0
    def load(self, model):
        """
        Loads the object info from data model content
        
        Parameters
        ----------
        model : str or DataModelDict
            Model content or file path to model content.
        """
        # Check if model is data model
        try:
            model = DM(model)
        except:
            bibtex = model
        else:
            bibtex = model.find('bibtex')

        for key in self.asdict():
            delattr(self, key)

        # Parse and extract content
        parser = BibTexParser()
        parser.customization = convert_to_unicode
        bib_database = bibtexparser.loads(bibtex, parser=parser)
        assert len(
            bib_database.entries) == 1, 'bibtex must be for a single reference'

        bibdict = bib_database.entries[0]
        for key, value in bibdict.items():
            setattr(self, key, value)
示例#30
0
文件: cli.py 项目: jasonzou/MyPapers
def convert(inpath):
    '''
    Convert from bibtex to bibjson.
    One argument expected: path to bibtex file.
    '''
    import bibtexparser
    from bibtexparser.bparser import BibTexParser
    import json

    parser = BibTexParser()
    with open(inpath) as bibtex_file:
        parser.customization = customizations
        bib_database = bibtexparser.load(bibtex_file, parser=parser)
    myRecords = list()
    num = 1
    records = dict()
    for record in bib_database.entries:
        record1 = dict()
        record1 = record
        record1["_id"] = num
        record1["collection"] = "test01"
        num = num + 1
        
        myRecords.append(record1)
        #temp = json.dumps(record, indent=2, sort_keys=True)
        #t
        #myRecords
    records["records"] = myRecords
    return records
示例#31
0
def get_bibtex(f):
    parser = BibTexParser(common_strings=False)
    parser.ignore_nonstandard_types = False
    parser.homogenise_fields = True
    parser.customization = clean_tex

    return bibtexparser.load(f, parser)
def bib2jekyllcol (inputFile, outputDir):
    "This prints the bibtex file to output directory as jekyll collection folder(s)" 
           
    # read and parse bib file
    with open(inputFile) as bibtex_file:
        bibtex_str = bibtex_file.read()
    
    parser = BibTexParser()
    parser.customization = convert_to_unicode
    bib_database = bibtexparser.loads(bibtex_str, parser=parser)

    # create dictionary for transformation of month to number
    month_list = ["jan", "feb", "mar", "apr", "may", "june", "july", "aug", "sept", "oct", "nov", "dec"]
    
    # type names:
    type_list = ["title", "author", "journal", "volume", "number",
                  "year", "month", "doi", "pages", "publisher", "booktitle", "note"]
    
    if not os.path.exists(outputDir):
        os.makedirs(outputDir)
    else:
        print("Deleting existing collection file...\n")
        for file in os.listdir(outputDir):
            file_path = os.path.join(outputDir, file)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
        except Exception, e:
            print e
示例#33
0
def _ingest_citations(rc):
    import bibtexparser
    from bibtexparser.bparser import BibTexParser
    from bibtexparser.customization import getnames

    parser = BibTexParser()
    parser.ignore_nonstandard_types = False

    def customizations(record):
        for n in ['author', 'editor']:
            if n in record:
                a = [i for i in record[n].replace('\n', ' ').split(', ')]
                b = [i.split(" and ") for i in a]
                c = [item for sublist in b for item in sublist]
                d = [i.strip() for i in c]
                record[n] = getnames(d)

        return record

    parser.customization = customizations
    with open(rc.filename, 'r') as f:
        bibs = bibtexparser.load(f, parser=parser)
    coll = rc.client[rc.db][rc.coll]
    for bib in bibs.entries:
        bibid = bib.pop('ID')
        bib['entrytype'] = bib.pop('ENTRYTYPE')
        if 'author' in bib:
            bib['author'] = [a.strip() for b in bib['author'] for a in
                             RE_AND.split(b)]
        if 'title' in bib:
            bib['title'] = RE_SPACE.sub(' ', bib['title'])
        rc.client.update_one(rc.db, rc.coll, {'_id': bibid},
                             bib, upsert=True)
示例#34
0
def process_dir(bibfile, md_files, out_dir, build_dir):
    outlitfile = out_dir / Path('{}.md'.format(bibfile.stem))
    outlitfilehtml = Path('..') / Path(out_dir.name) / Path('{}.html'.format(
        bibfile.stem))

    out_dir.mkdir(parents=True, exist_ok=True)

    with open(bibfile, 'r') as bibtex_file:
        parser = BibTexParser()
        parser.customization = convert_to_unicode
        bib_data = bibtexparser.load(bibtex_file, parser=parser)

    n = 1
    refs = ''
    for fname in md_files:
        if fname.resolve() == (out_dir / fname.name).resolve():
            raise ValueError(
                "Script would overwrite the input. Choose different out_dir.")

        with open(fname, 'r') as f, open(out_dir / fname.name, 'w') as fp:
            processed_lines, bib_data, refs, n = preprocess_markdown_file(
                f, bib_data, reffile=outlitfilehtml, n=n, refs=refs)

            for l in processed_lines:
                fp.write(l)

    with open(outlitfile, 'w') as outfile:
        outfile.write('title: References\n')
        outfile.write('---')
        outfile.write(refs)
示例#35
0
    def load_model(self, model, name=None):
        """
        Loads the object info from data model content
        
        Parameters
        ----------
        model : str or DataModelDict
            Model content or file path to model content.
        name : str, optional
            The name to use when saving the record.
        """
        try:
            super().load_model(model, name=name)
        except:
            bibtex = model
        else:
            bibtex = self.model.find('bibtex')

        # Parse and extract content
        parser = BibTexParser()
        parser.customization = convert_to_unicode
        bib_database = bibtexparser.loads(bibtex, parser=parser)
        assert len(
            bib_database.entries) == 1, 'bibtex must be for a single reference'

        self.__bib = bib_database.entries[0]
        try:
            self.name
        except:
            self.name = self.doifname

        try:
            self.model
        except:
            self.build_model()
def parseEntry(s):
    # normalize unicode by reparsing
    parser = BibTexParser()
    parser.customization = convert_to_unicode
    db1 = bibtexparser.loads(s, parser=parser)
    es = db1.entries
    return es[0]
示例#37
0
    def biblatex(self):
        """Return entry formatted in BibLateX style as dictionary or 'None'."""
        entry_str = self.as_str("BibTeX")
        if entry_str:
            parser = BibTexParser()
            parser.customization = convert_to_unicode
            
            bib_database = bibtexparser.loads(entry_str, parser=parser)
        
            # convert 'journal' to 'journaltitle'
            for e in bib_database.entries:
                if 'journal' in e.keys():
                    e['journaltitle'] = e['journal']
                    del e['journal']

#                special_terms = {" &": ' \&'}
#                for key in special_terms.keys():
#                    regexp = re.compile(key)
#                    e['title'] = regexp.sub(special_terms[key], e['title'])
#                    print(e['title'])

            bibtex_string = bibtexparser.dumps(bib_database)
            return _entry_from_str(bibtex_string)
        else:
            return None
示例#38
0
文件: bibtex.py 项目: saru0000/bgc-md
def printable_bibtex_entry(entry):
    # converts a dictionary BibTeX entry to LaTeX format

    entry_str = bibtex_entry_str(entry)
    parser = BibTexParser()
    parser.customization = homogeneize_latex_encoding
    bib_database = bibtexparser.loads(entry_str, parser = parser)
    return(bib_database.entries[0])
示例#39
0
def bib_parse(path):

    with open(path) as bibtex_file:
        parser = BibTexParser()
        parser.customization = custom_callback
        bib_database = bibtexparser.load(bibtex_file, parser=parser)
        input_data = bib_database.entries
    return input_data
示例#40
0
def load_bibtex_string(string):
    string_parser = BibTexParser(common_strings=True,
                                 ignore_nonstandard_types=True)
    string_parser.customization = customizations

    bib_database = bibtexparser.loads(string, parser=string_parser)

    return bib_database
示例#41
0
def load_bib(filename):
    with open(filename) as bibtex_file:
        parser = BibTexParser()
        parser.customization = convert_to_unicode
        bib_database = bibtexparser.loads(bibtex_file.read().replace(
            "{{", "{").replace("}}", "}"),
                                          parser=parser)
        return bib_database
示例#42
0
def load_bibtex_file(filepath):
    parser = BibTexParser(common_strings=True, ignore_nonstandard_types=True)
    parser.customization = customizations

    with open(filepath, "r") as bibtex:
        bib_database = bibtexparser.load(bibtex, parser=parser)

    return bib_database
示例#43
0
def _parser():
    '''
    Return a configured bibtex parser.
    '''
    parser = BibTexParser()
    parser.interpolate_strings = False
    parser.customization = cleanup_record
    return parser
示例#44
0
 def deserialize_publications(self, base_path):
     # Scientific publications
     sci_pubs_file = os.path.join(base_path, 'sci_publications.bib')
     if os.path.exists(sci_pubs_file):
         parser = BibTexParser()
         parser.customization = homogenize_latex_encoding
         with open(sci_pubs_file, encoding='utf-8') as bibtex_file:
             bib_database = bibtexparser.load(bibtex_file, parser=parser)
             self.scientificPubs = bib_database.entries
     # Popular publications
     pop_pubs_file = os.path.join(base_path, 'pop_publications.bib')
     if os.path.exists(pop_pubs_file):
         parser = BibTexParser()
         parser.customization = homogenize_latex_encoding
         with open(pop_pubs_file, encoding='utf-8') as bibtex_file:
             bib_database = bibtexparser.load(bibtex_file, parser=parser)
             self.popularPubs = bib_database.entries
示例#45
0
def render_hal_biblio(keywords):
    """Send a query to HAL API and display returned bibtex entries."""
    biblio = requests.get(HAL_QUERY_API.format(keywords)).text

    parser = BibTexParser()
    parser.customization = convert_to_unicode
    bib_database = bibtexparser.loads(biblio, parser=parser)

    bib_database.entries.sort(key=lambda x: x['year'], reverse=True)
    templateVars = format_bibdatabase(bib_database.entries)
    return render_template("hal.html", **templateVars)
示例#46
0
def strip_and_write(inputfile, outputfile):
    bibdata = None
    with open(inputfile, 'r') as f:
        parser = BibTexParser()
        parser.customization = customization
        bibdata = bibtexparser.load(f, parser=parser)

    if bibdata is None:
        sys.exit("Could not load input file {}".format(inputfile))

    with open(outputfile, 'w') as f:
        bibtexparser.dump(bibdata, f)
示例#47
0
def parseFile(bib_file):
    """parse the bib file
    
    :param bib_file: bibtex file to be parsed
    :returns: -- a bibtex file object
    """
    with open(bib_file) as bibtex_file: 
        parser = BibTexParser()
        parser.homogenize = True        
        parser.customization = customizations
        data = b.load(bibtex_file, parser = parser)
        return data
def parseFile(bib_file):
    """parse the bib file

    :param bib_file: bibtex file to be parsed
    :returns: -- a bibtex file object
    """
    with open(bib_file) as bibtex_file:
        parser = BibTexParser() #import the parsers
        parser.homogenize = True
        parser.customization = customizations #add some customizations defined earlier
        data = b.load(bibtex_file, parser = parser) #process data yah!
        return data
示例#49
0
 def ref(self, id, published=False, standardised=False):
     """ Download bibliographic data of document. """
     
     # Set up BibTeX parser.
     parser = BibTexParser()
     parser.customization = convert_to_unicode
     
     # Get correct document report number.
     number = self.number(id=id)
     
     # Get bibtex file and parse.
     request = requests.get('{}/papers/{}.bib'.format(self.url, number))
     text = request.content.decode('utf8').replace(u'\xa0', u' ')
     bibtex = bibtexparser.loads(text, parser=parser).entries[0]
     
     # If 'published' keyword is true, fetch information on where document
     # was eventually published.
     if published:
         request = requests.get('{}/papers/{}'.format(self.url, number))
         soup = BeautifulSoup(request.text, 'html.parser')
         bibtex['published'] = soup.find(attrs={'id': 'published_line'})
         
         # Clean up returned text (get rid of extra spaces).
         if bibtex['published']:
             bibtex['published'] = ' '.join(bibtex['published'].text.strip().split())
     
     # If 'standardised' keyword is true, return dictionary with common keywords.
     if standardised:
         standard = {
             'NberID': bibtex['ID'][4:],
             'Abstract': ' '.join(bibtex['abstract'].strip().split()),
             'Month': bibtex['month'].strip(),
             'Year': int(bibtex['year']),
             'Title': ' '.join(bibtex['title'].strip().split()),
             'Authors': []
         }
         
         # All authors appear to be separated by 'and'.
         authors = bibtex['author'].split(' and ')
         for author in authors:
             standard['Authors'].append({'Name': author})
         
         # Sometimes, the 'month' keyword returns the year for documents released
         # in January.
         if re.match('^\d{4}$', standard['Month']):
             standard['Month'] = 'January'
         
         if 'published' in bibtex:
             standard['Published'] = bibtex['published']
         
         bibtex = standard
     
     return bibtex
示例#50
0
文件: PDFdl.py 项目: LoicH/aps
def openBibLib(bibName): 
    """Extracts the corpus information from a bibtex file
    
    @param bibName: the path to the file
    @type bibName: string
    
    @return: the BibDatabase object with all the information
    @rtype: BibDatabase"""
    with open(bibName) as bibtex_file:  
        parser = BibTexParser()
        parser.customization = convert_to_unicode
        bibtex_database = bibtexparser.load(bibtex_file, parser=parser) 
    return bibtex_database
示例#51
0
def collection_from_bibtex_str(bib_str, **kwargs):
    """
    Transform a Bibtex string (e.g. from a .bib-file) to a BibJSON collection.
    :param bib_str: input bibtex string
    :param kwargs: metadata for the BibJSON collection. "collection" parameter must be set.
    :return BibJSON collection dictionary
    """
    bib_parser = BibTexParser()
    bib_parser.ignore_nonstandard_types = False     # this is flipped. this seems to be an error in the library
    bib_parser.customization = _parse_bib_entry

    bib_obj = bibtexparser.loads(bib_str, parser=bib_parser)

    return collection_from_dict(bib_obj.entries_dict, **kwargs)
示例#52
0
def import_bibtex_raw_content(request):
    review_id = request.POST.get('review-id')
    source_id = request.POST.get('source-id')
    bibtex_file = request.POST.get('bibtex_file')

    review = Review.objects.get(pk=review_id)
    source = Source.objects.get(pk=source_id)

    parser = BibTexParser()
    parser.customization = convert_to_unicode
    bib_database = bibtexparser.loads(bibtex_file, parser=parser)
    articles = bibtex_to_article_object(bib_database, review, source)
    _import_articles(request, source, articles)

    return redirect(r('import_studies', args=(review.author.username, review.name)))
示例#53
0
文件: bibtex.py 项目: Phyks/dissemin
def parse_bibtex(bibtex):
    """
    Parse a single bibtex record represented as a string to a dict
    """
    parser = BibTexParser()
    parser.customization = customizations
    db = bibtexparser.loads(bibtex, parser=parser)

    if not db.entries:
        raise ValueError('No bibtex item was parsed.')
    if len(db.entries) > 1:
        logger.warning("%d bibtex items, defaulting to first one",
                       len(db.entries))

    return db.entries[0]
示例#54
0
文件: fetch.py 项目: siudej/Cite
    def _cleanupBibTex(self, count):
        """ Clean up bibtex and ensure uniform look. """
        import bibtexparser
        from bibtexparser.bparser import BibTexParser
        parser = BibTexParser()
        parser.customization = homogeneize_latex_encoding
        bib = bibtexparser.loads(self.refs, parser=parser)

        # save results
        from bibtexparser.bwriter import BibTexWriter
        writer = BibTexWriter()
        writer.contents = ['entries']
        writer.indent = '    '
        writer.order_entries_by = ('id')
        self.number = len(bib.entries)
        self.refs = bibtexparser.dumps(bib, writer)
示例#55
0
def get_all_entries(locations):
    """
    Retrieve all the .bib files and parse their content to a list of simple dictionaries
    :param locations: locations of all the .bib files
    :return: return a list of dictionaries
    """
    result = []
    my_parser = BibTexParser()
    my_parser.customization = _customizations
    for file_location in locations:
        if file_location.endswith(".bib"):
            with open(file_location) as bib_file:
                try:
                    result.extend(bibtexparser.load(bib_file, parser=my_parser).entries)
                except:
                    logger.error(".bib parsing error: " + file_location)
    return result
示例#56
0
    def handle(self, *args, **kwargs):
        bibtex_files = settings.BIBTEX_FILES
        bibtex_output = settings.BIBTEX_OUTPUT

        db = []
        for bib in bibtex_files:
            print "Processing BibTeX file %s" % bib
            try:
                parser = BibTexParser()
                parser.customization = customizations
                with open(bib, 'r') as inf:
                    dbTemp = bibtexparser.load(inf, parser=parser).entries
                for entryTemp in dbTemp:
                    if not any(entryTemp['title'].lower() == entry['title'].lower() for entry in db):
                        db.append(deepcopy(entryTemp))

            except:
                traceback.print_exc()
                print ('An error occured while processing [' + bib + ']. Its content will be ignored.')

        # Start rendering HTML
        try:
            for entry in db:
                print entry
                print 'Using template bib2html/ieee/' + entry['ENTRYTYPE'] + '.html'
                bibTemplate = get_template('bib2html/ieee/' + entry['ENTRYTYPE'] + '.html')
                entry['formatted'] = bibTemplate.render(entry)
        except:
            traceback.print_exc()
            print ('An error occured while processing the style files.'
                'The program will exit without completing the task.')

        if_review = raw_input('Do you want to review? [N] ')
        if len(if_review) > 0 and if_review.lower()[0] == 'y':
            for entry in db:
                # write to output
                print entry['formatted']

        if_write = raw_input('Write to %s? [Y] ' % (bibtex_output,))
        if len(if_write) > 0 and if_write.lower()[0] == 'n':
            print 'Okay, won\'t write'
        else:
            if not os.path.exists(os.path.dirname(bibtex_output)):
                os.makedirs(os.path.dirname(bibtex_output))
            with open(bibtex_output, 'w+') as out:
                out.write('<ul>' + ''.join([x['formatted'] for x in db]) + '</ul>')
示例#57
0
def parse_bibfile(bibfile):

    with open(bibfile) as bibtex_file:
        parser = BibTexParser()
        parser.customization = customizations
        bib_database = bibtexparser.load(bibtex_file, parser=parser)

        papers = []

        for e in bib_database.entries:
            p = extract_paper_info(e)
            if not e is None:
                papers.append(p)

    papers.sort(reverse=True)

    return papers
示例#58
0
def handle_bibtex( infile, user=None, verbose=False ):
    if user:
        u = user
    else:
        u = User.objects.get(username='******')
    # manage data
    # load bibtex file to in-memory db
    parser = BibTexParser()
    parser.customization = convert_to_unicode
    bib_database = bibtexparser.load(infile, parser=parser)
    for art in bib_database.entries:
        if len( Article.objects.filter(title=art['title']) ) == 0:
            art = clean_entry( art, [ 'title', 'author', 'journal', 'publisher', 'year', 'volume', 'pages'] )
            a = Article(user=u, hide=False, **art)
            if verbose:
                print a
            a.save()
示例#59
0
文件: bibtex.py 项目: Lysxia/dissemin
def parse_bibtex(bibtex):
    """
    Parse a single bibtex record represented as a string to a dict
    """
    bibtex = insert_newlines_in_bibtex(bibtex)
    parser = BibTexParser()
    parser.customization = convert_to_unicode
    db = bibtexparser.loads(bibtex)#, parser=parser)

    if len(db.entries) == 0:
        raise ValueError('No bibtex item was parsed.')
    if len(db.entries) > 1:
        print "Warning: %d Bibtex items in parse_bibtex, defaulting to the first one" % len(db.entries)

    entry = db.entries[0]
    entry['author'] = parse_authors_list(entry.get('author', ''))
    return entry
示例#60
0
def fix_bibliography(bibtex_string):
    """
    Given a bibliography file, `fixes` it by removing URLs from articles,
    ASCIIifying all the fields and replacing dates with years.
    """

    # Make a parser that will ASCIIify everything:
    # See: https://bibtexparser.readthedocs.io/en/v0.6.2/tutorial.html#accents-and-weird-characters
    parser = BibTexParser()
    parser.customization = homogeneize_latex_encoding

    bibtex = bibtexparser.loads(bibtex_string, parser=parser)

    for entry in bibtex.entries:
        fix_entry(entry)

    # TODO: if py3k, do not encode.
    return bibtexparser.dumps(bibtex).encode("UTF-8")