Exemplo n.º 1
0
def format_paper_citation_dict(citation, indent='  '):
    """
    Format a citation dict for a paper or a list of papers into a BibTeX
    record string.

    :param citation: A ``Paper`` citation dict or list of such dicts.
    :param indent: Indentation to be used in BibTeX output.
    """
    if isinstance(citation, dict):
        entries = [citation]
    else:
        entries = citation

    # Handle conflicting ids for entries
    entries_ids = collections.defaultdict(lambda: 0)
    for entry in entries:
        entry_id = entry['ID']
        entries_ids[entry_id] += 1
        if entries_ids[entry_id] > 1:
            entry['ID'] = '%s_%s' % (entry_id, entries_ids[entry_id])

    writer = BibTexWriter()
    writer.indent = indent
    with io.StringIO('') as bibfile:
        db = BibDatabase()
        db.entries = entries
        bibfile.write(writer.write(db))
        return bibfile.getvalue().strip()
Exemplo n.º 2
0
    def load_and_replace(bibtex_file):
        with open(os.path.join('publications', bibtex_file),
                  'r',
                  encoding="utf-8") as f:
            fdata = f.read()
            pdict = BibTexParser(fdata).get_entry_dict()
            plist = BibTexParser(fdata, bc.author).get_entry_list()
        by_year = {}

        for pub in plist:
            pubd = pdict[pub['ID']]
            db = BibDatabase()
            db.entries = [pubd]
            writer = BibTexWriter()
            writer.indent = '\t'
            bibentry = writer.write(db)
            pub['BIB_ENTRY'] = bibentry
            for field in pub:
                if field == 'BIB_ENTRY':
                    continue
                pub[field] = context.make_replacements(pub[field])
            pub['author'] = _format_author_list(pub['author'])
            y = int(pub['year']) if 'year' in pub else 1970
            if y not in by_year:
                by_year[y] = []
            by_year[y].append(pub)

        ret = []
        for year, pubs in sorted(by_year.items(), reverse=True):
            for pub in pubs:
                ret.append(pub)

        return ret
Exemplo n.º 3
0
def work_to_bibtex(work, name=None, acronym=False, rules=None):
    """Convert work to bibtex text

    Doctest:

    .. doctest::

        >>> reload()
        >>> murta2014a = work_by_varname("murta2014a")
        >>> print(work_to_bibtex(murta2014a))
        @inproceedings{murta2014a,
          address = {Cologne, Germany},
          author = {Murta, Leonardo and Braganholo, Vanessa and Chirigati, Fernando and Koop, David and Freire, Juliana},
          booktitle = {International Provenance and Annotation Workshop},
          pages = {71--83},
          publisher = {Springer},
          title = {no{W}orkflow: capturing and analyzing provenance of scripts},
          year = {2014}
        }
        <BLANKLINE>
        <BLANKLINE>

        Custom name:

        >>> reload()
        >>> murta2014a = work_by_varname("murta2014a")
        >>> print(work_to_bibtex(murta2014a, name="other"))
        @inproceedings{other,
          address = {Cologne, Germany},
          author = {Murta, Leonardo and Braganholo, Vanessa and Chirigati, Fernando and Koop, David and Freire, Juliana},
          booktitle = {International Provenance and Annotation Workshop},
          pages = {71--83},
          publisher = {Springer},
          title = {no{W}orkflow: capturing and analyzing provenance of scripts},
          year = {2014}
        }
        <BLANKLINE>
        <BLANKLINE>

        Use acronym for place name:

        >>> print(work_to_bibtex(murta2014a, acronym=True))
        @inproceedings{murta2014a,
          address = {Cologne, Germany},
          author = {Murta, Leonardo and Braganholo, Vanessa and Chirigati, Fernando and Koop, David and Freire, Juliana},
          booktitle = {IPAW},
          pages = {71--83},
          publisher = {Springer},
          title = {no{W}orkflow: capturing and analyzing provenance of scripts},
          year = {2014}
        }
        <BLANKLINE>
        <BLANKLINE>
    """
    result = work_to_bibtex_entry(work, name=name, acronym=acronym, rules=rules)
    db = BibDatabase()
    db.entries = [result]
    writer = BibTexWriter()
    writer.indent = "  "
    return writer.write(db)
Exemplo n.º 4
0
def main_resps():
    in_file = Path("data/resps-perturbed-tides.txt")
    bibs = parse_citations(in_file)

    db = BibDatabase()


    db.entries = [
        bib.to_bibtex() for bib in bibs
    ]

    id_to_count = defaultdict(lambda : 0)
    for entry in db.entries:
        id_to_count[entry["ID"]] += 1

    for the_id, count in id_to_count.items():
        if count > 1:
            for entry in [e for e in db.entries if e["ID"] == the_id]:
                count -= 1
                entry["ID"] += ascii_lowercase[count]
        



    writer = BibTexWriter()
    writer.indent = "    "
    with Path("data/resps-tides-perturbed-refs.bib").open("wb") as ref_file:
        ref_file.write(writer.write(db).encode())
Exemplo n.º 5
0
    def save(self, bibfile=-1):

        """
        save the biblist with :
            - the original filename without any arg
              or
            - the given file name if not empty

        """

        if bibfile == -1:
            bibfile = self.name

        db = BibDatabase()
        for item in self:
            db.entries.append(item)

        writer = BibTexWriter()    # this class is needed to prepare format
        writer.indent = '   '      # indent entries with 4 spaces instead of one
        writer.comma_first = False # place the comma at the beginning of the line
        writer.align_values = True # with a nice indentation

        print('')
        print(os.path.join(os.path.expandvars('$PYBLIO_BIB'), bibfile))
        print('')

        with open(os.path.join(os.path.expandvars('$PYBLIO_BIB'), bibfile), 'w') as bf:
            bf.write('\n')
            bf.write(writer.write(db))
            bf.write('\n')
Exemplo n.º 6
0
def metaDictToBib(jobid, metadict, omit_keys, path_prefix):
    """Export meta data to bibtex format

    Args:
        jobid (int): id of job.
        metadict (DocMeta): meta dict of a doc.
        alt_dict (dict): dict for key changes.
        omit_keys (list): keys to omit in the converted dict.
        path_prefix (str): folder path to prepend to attachment file paths.

    Returns:
        rec (int): 0 if successful, 1 otherwise.
        jobid (int): the input jobid as it is.
        dbtext (str): formated bibtex entry, '' if <rec>==1.
        docid (int): id of the processed document.
    """

    try:
        alt_dict = INV_ALT_KEYS
        ord_dict = toOrdinaryDict(metadict, alt_dict, omit_keys, path_prefix)

        db = BibDatabase()
        db.entries = [
            ord_dict,
        ]
        writer = BibTexWriter()
        writer.indent = '    '
        writer.comma_first = False
        dbtext = writer.write(db)

        return 0, jobid, dbtext, metadict['id']

    except Exception:
        LOGGER.exception('Failed to write to bibtex')
        return 1, jobid, '', metadict['id']
Exemplo n.º 7
0
def format_paper_citation_dict(citation, indent='  '):
    """
    Format a citation dict for a paper or a list of papers into a BibTeX
    record string.

    :param citation: A ``Paper`` citation dict or list of such dicts.
    :param indent: Indentation to be used in BibTeX output.
    """
    if isinstance(citation, dict):
        entries = [citation]
    else:
        entries = citation

    # Handle conflicting ids for entries
    entries_ids = collections.defaultdict(lambda: 0)
    for entry in entries:
        entry_id = entry['ID']
        entries_ids[entry_id] += 1
        if entries_ids[entry_id] > 1:
            entry['ID'] = '%s_%s' % (entry_id, entries_ids[entry_id])

    writer = BibTexWriter()
    writer.indent = indent
    with io.StringIO('') as bibfile:
        db = BibDatabase()
        db.entries = entries
        bibfile.write(writer.write(db))
        return bibfile.getvalue().strip()
Exemplo n.º 8
0
def getcitation():
    articlesparser = BibTexParser(common_strings=False)
    articlesparser.ignore_nonstandard_types = False
    with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', encoding='utf8') as articlesfile:
        articles_database = bibtexparser.load(articlesfile, articlesparser)

    articleentries = articles_database.entries

    import random
    samplelist = random.sample(range(len(articleentries)), 20)
    print(samplelist)

    for i in samplelist:
        print("---------------------------")
        print("Entry number: " + str(i))
        title = articleentries[i]['title']
        clusterid = articleentries[i]['clusterid']
        print("Title: " + title)
        print("Cluster ID: " + clusterid)

        if not clusterid == "unknown":
            print(str(i))
            try:
                citations = os.popen(
                    '''/usr/bin/python3 /home/limingtao/ircre-bibtex/ircreupdate/scholarpy/scholar.py -c 1 -C ''' + clusterid + ''' |grep -v list |grep Citations''').read().strip().split()[
                    -1]
            except:
                citations = "unknown"
        else:
            citations = "unknown"

        print("new Citations: " + citations)

        if 'cited' in articleentries[i]:
            oldcitednumber = int(articleentries[i]['cited'])
        else:
            oldcitednumber = 0

        print("Old Cited Number: " + str(oldcitednumber))

        if not citations == "unknown":
            citednumber = int(citations)
            if citednumber > oldcitednumber and ((citednumber - oldcitednumber) < 8):
                articleentries[i]['cited'] = str(citednumber)

        writer = BibTexWriter()
        writer.indent = '    '
        writer.order_entries_by = ('order',)

        with open('/home/limingtao/ircre-bibtex/ircreupdate/cited-add-articles.bib', 'w', encoding='utf8') as newarticlefile:
            bibtexparser.dump(articles_database, newarticlefile, writer=writer)

        os.popen("cp /home/limingtao/ircre-bibtex/ircreupdate/cited-add-articles.bib tempcited-add-articles.bib")

    os.popen("cp /home/limingtao/ircre-bibtex/ircreupdate/articles.bib /home/limingtao/ircre-bibtex/ircreupdate/oldarticles.bib")
    with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', 'w', encoding='utf8') as newarticlefile:
        bibtexparser.dump(articles_database, newarticlefile, writer=writer)

    return 0
Exemplo n.º 9
0
def getBibtexStrFromAbstractDict(abstractDict):
    abstractDict.pop('url')
    abstractDict.pop('journal')
    db = BibDatabase()
    writer = BibTexWriter()
    writer.indent = '    '
    db.entries = [abstractDict]
    return writer.write(db)
Exemplo n.º 10
0
def write_bib(bib_database, filen="dl4m.bib"):
    """Description of write_bib
    Write the items stored in bib_database into filen
    """
    writer = BibTexWriter()
    writer.indent = '  '
    writer.order_entries_by = ('noneyear', "author")
    with open(filen, "w", encoding="utf-8") as bibfile:
        bibfile.write(writer.write(bib_database))
Exemplo n.º 11
0
def write_bibtex(db, filename):
    '''
    Writes the database into the file named filename
    '''
    with open(filename, 'w', encoding='utf-8') as bibtex_file:
        writer = BibTexWriter()
        writer.add_trailing_comma = True
        writer.indent = ''
        bibtexparser.dump(db, bibtex_file, writer)
Exemplo n.º 12
0
def _writer():
    '''
    Return a configured bibtex writer.
    '''
    writer = BibTexWriter()
    writer.indent = '  '
    writer.order_entries_by = ('ID',)
    writer.display_order = ['title', 'author', 'editor']
    return writer
Exemplo n.º 13
0
    def make_bibs(self, prefix, output):
        all_bibs = self.get_all_bibs(prefix)
        bib_db = BibDatabase()
        bib_db.entries = all_bibs
        writer = BibTexWriter()
        writer.indent = '\t'

        with open(output, 'w') as f:
            bibtexparser.dump(bib_db, f, writer)
        logging.info('processed %d bib entries', len(all_bibs))
Exemplo n.º 14
0
def convert_to_bib(content, save_fpath):
    papers = parse_api_response(content)
    db = BibDatabase()

    db.entries = papers
    writer = BibTexWriter()
    writer.indent = "    "
    writer.comma_first = True
    with open(save_fpath, "w+") as bibfile:
        bibfile.write(writer.write(db))
Exemplo n.º 15
0
 def dumps(bibman):
     db = bibtexparser.bparser.BibDatabase()
     db._entries_dict = bibman.cleaned
     db.entries = list(bibman.cleaned.values())
     writer = BibTexWriter()
     # writer.order_entries_by = ('type', 'author', 'year')
     writer.order_entries_by = None
     writer.contents = ['comments', 'entries']
     writer.indent = '    '
     new_text = bibtexparser.dumps(db, writer)
     return new_text
Exemplo n.º 16
0
def bibtexclassify():
    parser = BibTexParser(common_strings=False)
    parser.ignore_nonstandard_types = False

    with open('/home/limingtao/ircre-bibtex/ircreupdate/ircre.bib', encoding='utf8') as bibtexfile:
        ircrebib_database = bibtexparser.load(bibtexfile, parser)

    allentries = ircrebib_database.entries.copy()
    # ----------------------------------------
    # get all articles
    # -----------------------------------------
    article_entries = []
    for i in range(len(allentries)):
        if allentries[i]['ENTRYTYPE'] == 'article':
            article_entries.append(allentries[i].copy())

    article_database = BibDatabase()
    article_database.entries = article_entries

    writer = BibTexWriter()
    writer.indent = '    '
    writer.order_entries_by = ('order',)
    with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', 'w', encoding='utf8') as article_file:
        bibtexparser.dump(article_database, article_file, writer=writer)

    otherentries= []
    for i in range(len(allentries)):
        if allentries[i]['ENTRYTYPE'] == 'inbook' or allentries[i]['ENTRYTYPE'] == 'inproceedings' or allentries[i]['ENTRYTYPE'] == 'incollection':
            otherentries.append(allentries[i].copy())

    other_database = BibDatabase()
    other_database.entries = otherentries

    writer2 = BibTexWriter()
    writer2.indent = '    '
    writer2.order_entries_by = ('order',)
    with open('/home/limingtao/ircre-bibtex/ircreupdate/others.bib', 'w', encoding='utf8') as others_file:
        bibtexparser.dump(other_database, others_file, writer=writer2)


    return 0
Exemplo n.º 17
0
    def test_comma_first(self):
        with io.open(_data_path('book.bib'), 'r') as bibfile:
            bib = BibTexParser(bibfile.read())

        with io.open(_data_path('book_comma_first.bib'), 'r') as bibfile:
            expected = bibfile.read()
        writer = BibTexWriter()
        writer.indent = '   '
        writer.comma_first = True
        result = writer.write(bib)
        self.maxDiff = None
        self.assertEqual(expected, result)
Exemplo n.º 18
0
def printCandidates(candidates: List[Dict]):
    writer = BibTexWriter()
    writer.align_values = True
    writer.indent = "  "

    db = BibDatabase()
    db.entries = candidates

    print(f"{len(candidates)} suggestions:\n")

    output = writer.write(db)
    print(output)
def bibtex_entries_to_string(entries: List[Dict]):
    if len(entries) == 0:
        return ""

    writer = BibTexWriter()
    writer.align_values = True
    writer.indent = "  "

    db = BibDatabase()
    db.entries = entries

    return writer.write(db)
Exemplo n.º 20
0
    def test_comma_first(self):
        with io.open(_data_path('book.bib'), 'r') as bibfile:
            bib = BibTexParser(bibfile.read())

        with io.open(_data_path('book_comma_first.bib'), 'r') as bibfile:
            expected = bibfile.read()
        writer = BibTexWriter()
        writer.indent = '   '
        writer.comma_first = True
        result = writer.write(bib)
        self.maxDiff = None
        self.assertEqual(expected, result)
Exemplo n.º 21
0
def write_bibliography(file, db):
    """Write bibliography entries to new file.

	Parameters
	----------
	file : str or writable file object
	db :
	"""
    writer = BibTexWriter()
    writer.indent = '    '

    with file_context(file, 'w', encoding='utf-8') as f:
        f.write(writer.write(db))
Exemplo n.º 22
0
 def parsing_write(self, filename):
     # print(self.booklist)
     datalist = []
     writer = BibTexWriter()
     writer.indent = '    '
     for ref in self.TreeView.full_list:
         # print(type(ref))
         datadict = dict((k, v) for k, v in
                         zip(self.entries, ref) if v is not None)
         datalist.append(datadict)
     self.db.entries = datalist
     with open(filename, 'w') as bibfile:
         bibfile.write(writer.write(self.db))
Exemplo n.º 23
0
def ircrebibmerge():
    articlesparser = BibTexParser(common_strings=False)
    articlesparser.ignore_nonstandard_types = False

    with open('/home/limingtao/ircre-bibtex/ircreupdate/sorted-articles.bib', encoding='utf8') as sortedarticle_file:
        sortedarticle_database = bibtexparser.load(sortedarticle_file, articlesparser)

    sortedarticles = sortedarticle_database.entries.copy()

    top15parser = BibTexParser(common_strings=False)
    top15parser.ignore_nonstandard_types = False

    with open('/home/limingtao/ircre-bibtex/ircreupdate/top15.bib', encoding='utf8') as top15_file:
        top15_database = bibtexparser.load(top15_file, top15parser)

    top15articles = top15_database.entries.copy()


    othersparser = BibTexParser(common_strings = False)
    othersparser.ignore_nonstandard_types = False

    with open('/home/limingtao/ircre-bibtex/ircreupdate/others.bib', encoding='utf8') as others_file:
        others_database = bibtexparser.load(others_file, othersparser)

    others = others_database.entries.copy()


    alldb = BibDatabase()
    entries = []

    for i in range(len(top15articles)):
        entries.append(top15articles[i].copy())

    for i in range(len(sortedarticles)):
        entries.append(sortedarticles[i].copy())

    for i in range(len(others)):
        entries.append(others[i].copy())

    alldb.entries = entries

    writer = BibTexWriter()
    writer.indent = '    '
    writer.order_entries_by = None

    with open('/home/limingtao/ircre-bibtex/ircreupdate/newircre.bib', 'w', encoding='utf8') as newircrebibfile:
        bibtexparser.dump(alldb, newircrebibfile, writer=writer)

    return 0
Exemplo n.º 24
0
    def write_res(self, passed_entries, passed_name, failed_entries,
                  failed_name):
        db = BibDatabase()
        db.entries = passed_entries
        writer = BibTexWriter()
        writer.indent = '    '
        writer.comma_first = False
        with open("results/" + passed_name, 'w') as bibfile:
            bibfile.write(writer.write(db))
        with open("results/" + failed_name, 'w') as f:
            json.dump(failed_entries, f, indent=4)

        print("Writing data to filesystem!")
        print("  -successful results can be found in: results/" + passed_name)
        print("  -failed results can be found in: results/" + failed_name)
    def test_indent(self):
        bib_database = BibDatabase()
        bib_database.entries = [{'ID': 'abc123',
                                 'ENTRYTYPE': 'book',
                                 'author': 'test'}]
        writer = BibTexWriter()
        writer.indent = '  '
        result = bibtexparser.dumps(bib_database, writer)
        expected = \
"""@book{abc123,
  author = {test}
}

"""
        self.assertEqual(result, expected)
Exemplo n.º 26
0
def reformatbib(infile,outfile):

    with open(infile,'r') as bibfileIn:

        try:
            bib_database = bibtexparser.load(bibfileIn)
            writer = BibTexWriter()
            writer.align_values = True   
            writer.indent = '  '     # indent entries with spaces 
            # writer.comma_first = True  # place the comma at the beginning of the line
            with open(outfile, 'w') as bibfileOut:
                bibfileOut.write(writer.write(bib_database))
                print(f'Reformated {infile} written to {outfile}')
        except:
            print(f'{infile} not parsed')
Exemplo n.º 27
0
def export_citations(citations, destination):

    db = BibDatabase()
    citation_dicts = (dict(row) for _, row in citations.iterrows())
    citation_dicts = [{
        attribute: value
        for attribute, value in citation.items() if value is not np.nan
    } for citation in citation_dicts]

    db.entries = citation_dicts

    with open(destination, "w") as bibtexfile:
        writer = BibTexWriter()
        writer.indent = "    "
        bibtexparser.dump(db, bibtexfile, writer)
Exemplo n.º 28
0
Arquivo: fetch.py Projeto: siudej/Cite
    def _cleanupBibTex(self, count):
        """ Clean up bibtex and ensure uniform look. """
        import bibtexparser
        from bibtexparser.bparser import BibTexParser
        parser = BibTexParser()
        parser.customization = homogeneize_latex_encoding
        bib = bibtexparser.loads(self.refs, parser=parser)

        # save results
        from bibtexparser.bwriter import BibTexWriter
        writer = BibTexWriter()
        writer.contents = ['entries']
        writer.indent = '    '
        writer.order_entries_by = ('id')
        self.number = len(bib.entries)
        self.refs = bibtexparser.dumps(bib, writer)
Exemplo n.º 29
0
def getclusterid(title, author):
    parser = BibTexParser(common_strings=False)
    parser.ignore_nonstandard_types = False

    with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', encoding='utf8') as article_file:
        article_database = bibtexparser.load(article_file, parser)

    article_entries = article_database.entries.copy()

    entries = bib_database.entries
    print("---------------------------")
    print("---------------------------")
    print("---------------------------")
    print("Total articles number: " + str(len(entries)))
    print("---------------------------")
    print("---------------------------")
    print("---------------------------")

    writer = BibTexWriter()
    writer.indent = '    '
    writer.order_entries_by = ('order',)

    for i in range(len(entries)):
        if entries[i]['clusterid'] == 'unknown':
            print("---------------------------")
            print("Entry number: " + str(i))
            title = entries[i]['title']
            print("Title: " + title)
            clusterid = ''
            try:
                clusterid = os.popen(
                    '''/home/limingtao/ircre-bibtex/ircreupdate/scholarpy/scholar.py -c 1 -t --phrase="''' + title + '''" |grep ID| grep Cluster''').read().strip().split()[
                    -1]
            except:
                clusterid = "unknown"

            print("new Cluster ID: " + clusterid)
            entries[i]['clusterid'] = clusterid
        with open('/home/limingtao/ircre-bibtex/ircreupdate/clusterid-added-ircre.bib', 'w', encoding='utf8') as newbibfile:
            bibtexparser.dump(bib_database, newbibfile, writer=writer)
        os.popen("cp /home/limingtao/ircre-bibtex/ircreupdate/clusterid-added-ircre.bib /home/limingtao/ircre-bibtex/ircreupdate/tempclusterid-added-ircre.bib")

    with open('/home/limingtao/ircre-bibtex/ircreupdate/clusterid-added-ircre.bib', 'w', encoding='utf8') as newbibfile:
        bibtexparser.dump(bib_database, newbibfile, writer=writer)

    return 0
Exemplo n.º 30
0
def proc_bib(input_io: TextIOWrapper,
             output_io: TextIOWrapper,
             jdb: JournalDB,
             silent: bool = False,
             output_format: str = "bib",
             abbrev_type="iso4"):
    if not hasattr(Journal, abbrev_type):
        raise ValueError(f"Invalid abbreviation type `{abbrev_type}`")

    bib_db = bibtexparser.load(input_io)

    for entry in bib_db.entries:
        journaltitle = entry.get("journaltitle")
        if journaltitle is None:
            continue
        journaltitle = braces_regex.sub("", journaltitle)

        name_pattern = re.compile(fr"^{re.escape(journaltitle)}(:?.*)$",
                                  RegexFlag.IGNORECASE)
        # TODO: query using lambdas?
        # TODO: normalize names (just in index?).
        res = jdb.journals.query_one(Journal.names_key, name_pattern)
        if res:
            _, journal = res
            abbrev = getattr(journal, abbrev_type)

            if output_format == "bib":
                entry["journaltitle"] = f"{{{abbrev or journaltitle}}}"
            elif output_format == "sourcemap":
                gen_sourcemap_map(journal, journaltitle, abbrev, output_io)

        abbrev_msg = f"abbreviating to '{abbrev}'" if res else f"no abbreviation found"
        if not silent:
            info(f"found journal name '{journaltitle}'; {abbrev_msg}.")

    if output_format == "bib":
        bib_writer = BibTexWriter()
        bib_writer.add_trailing_comma = True
        bib_writer.display_order = None
        bib_writer.indent = "\t"
        bib_writer.order_entries_by = None
        bibtex_code = bib_writer.write(bib_db)
        output_io.write(bibtex_code)
    elif output_format == "sourcemap":
        pass
Exemplo n.º 31
0
def main():
    import bibtexparser
    from bibtexparser.bwriter import BibTexWriter

    with open('ircre.bib', encoding='utf8') as bibtex_file:
        bib_database = bibtexparser.load(bibtex_file)

    entries = bib_database.entries
    print("---------------------------")
    print("---------------------------")
    print("---------------------------")
    print("Total articles number: " + str(len(entries)))
    print("---------------------------")
    print("---------------------------")
    print("---------------------------")

    writer = BibTexWriter()
    writer.indent = '    '
    writer.order_entries_by = ('order', )

    for i in range(len(entries)):
        if entries[i]['clusterid'] == 'unknown':
            print("---------------------------")
            print("Entry number: " + str(i))
            title = entries[i]['title']
            print("Title: " + title)
            clusterid = ''
            try:
                clusterid = os.popen(
                    '''./scholarpy/scholar.py -c 1 -t --phrase="''' + title +
                    '''" |grep ID| grep Cluster''').read().strip().split()[-1]
            except:
                clusterid = "unknown"

            print("new Cluster ID: " + clusterid)
            entries[i]['clusterid'] = clusterid
        with open('clusterid-added-ircre.bib', 'w',
                  encoding='utf8') as newbibfile:
            bibtexparser.dump(bib_database, newbibfile, writer=writer)
        os.popen("cp clusterid-added-ircre.bib tempclusterid-added-ircre.bib")

    with open('clusterid-added-ircre.bib', 'w', encoding='utf8') as newbibfile:
        bibtexparser.dump(bib_database, newbibfile, writer=writer)

    return 0
Exemplo n.º 32
0
def main():
    output = sys.argv[1]
    mds = sys.argv[2:]
    es = []
    for fn in mds:
        # print(f"loading {fn}")
        with open(fn, "r", encoding='UTF-8') as f:
            ls = f.readlines()[1:]
            ls = itertools.takewhile(lambda x: x != "---\n", ls)
            e = yaml.load("".join(ls), Loader=yaml.FullLoader)
            e['ID'] = fn.split("/")[1][0:-3]
            for i in ['title', 'booktitle']:
                if i in e:
                    s = e[i]
                    s = s.replace("#", "\#")
                    s = s.replace("&", "\&")
                    e[i] = s
            e['title'] = "{" + e['title'] + "}"
            if 'authors' in e:
                e['author'] = " and ".join(e['authors'])
                del e['authors']
            for i in ['isbn', 'pages', 'volume', 'year']:
                if i in e: e[i] = str(e[i])
            for i in [
                    'added', 'layout', 'notes', 'papers', 'read', 'readings',
                    'topics'
            ]:
                if i in e: del e[i]
            es.append(e)

    db = BibDatabase()
    db.entries = es

    writer = BibTexWriter()
    writer.contents = ['entries']
    writer.indent = '  '
    # writer.order_entries_by = ('ENTRYTYPE', 'author', 'year')
    bibtex_str = bibtexparser.dumps(db, writer)
    with open(output, "w") as f:
        print(("#############################################\n"
               "# This file is machine generated, do not edit\n"
               "#############################################\n"),
              file=f)
        print(bibtex_str, file=f)
Exemplo n.º 33
0
def write_bibtex_dict(stream, entries):
    """bibtexparser.write converts the entire database to one big string and
    writes it out in one go. I'm sure it will always all fit in RAM but some
    things just will not stand.

    """
    from bibtexparser.bwriter import BibTexWriter

    writer = BibTexWriter()
    writer.indent = '  '
    writer.entry_separator = ''
    first = True

    for rec in entries:
        if first:
            first = False
        else:
            stream.write(b'\n')
        stream.write(writer._entry_to_bibtex(rec).encode('utf8'))
def print_candidates(candidates: List[Dict]):
    count = len(candidates)
    if count == 0:
        print("No suggestions.")
    else:
        if count == 1:
            print("One suggestion:\n")
        else:
            print(f"{count} suggestions:\n")

        writer = BibTexWriter()
        writer.align_values = True
        writer.indent = "  "

        db = BibDatabase()
        db.entries = candidates

        output = writer.write(db)
        print(output)
Exemplo n.º 35
0
def write_bibtex_dict (stream, entries):
    """bibtexparser.write converts the entire database to one big string and
    writes it out in one go. I'm sure it will always all fit in RAM but some
    things just will not stand.

    """
    from bibtexparser.bwriter import BibTexWriter

    writer = BibTexWriter ()
    writer.indent = '  '
    writer.entry_separator = ''
    first = True

    for rec in entries:
        if first:
            first = False
        else:
            stream.write ('\n')
        stream.write (writer._entry_to_bibtex (rec))
Exemplo n.º 36
0
def write_bib(db, order=False):
    """
    Write bibtex string.

    Args:
        db (BibDatabase): database object to dump..
        order (bool): whether to reorder entries upon writing.

    Returns:
        The dumped string.
    """

    # Custom writer
    writer = BibTexWriter()
    writer.indent = '\t'
    writer.order_entries_by = None

    # Replace month by numeric value
    for entry in db.entries:
        if 'month' in entry and entry['month'] in MONTHS:
            entry['month'] = '{:02d}'.format(MONTHS.index(entry['month']) + 1)

    if order:
        # Manual sort
        order_entries_by = ('year', 'author', 'ID')
        sort_entries(db, order_entries_by)

    if not config.use_utf8_characters:
        db.entries = [nomenclature.encode_ascii_latex(entry) for entry in db.entries]

    if config.protect_uppercase:
        for entry in db.entries:
            entry["title"] = latex.protect_uppercase(entry["title"])

    # Write bib string
    return writer.write(db)
Exemplo n.º 37
0
    def formatText(self):
        if self.BibtexfilePath != '':
            self.openfile()
        else:
            self.readcontent()

        m = self.getMap()
        m['IEEE Global Communications Conference'] = m['IEEE Global Communications Conference, incorporating the Global Internet Symposium']
        del m['IEEE Global Communications Conference, incorporating the Global Internet Symposium']
        print m

        length = 0
        nb = {}
        for bibtex in self.allbibtex:
            for key in bibtex.keys():
                if len(key) > length and key != 'ENTRYTYPE':
                    length = len(key)
            for k, v in bibtex.items():
                if k == 'ENTRYTYPE' or k == 'ID':
                    nb[k] = v
                    continue
                elif k == 'ID':
                    nb[k] = v
                    continue
                elif k == 'doi' or k == 'ISSN' or k == 'keywords':
                    continue
                elif v == '':
                    continue
                elif 'url' in k:
                    continue

                nk = k + (length - len(k)) * ' '

                if 'booktitle' in nk:
                    if '(' in v:
                        v1 = v.split('(')[1].split(')')[0]
                        nb[nk] = 'Proc. of ' + v1
                        continue
                    flag = 0 # 未更改booktitle

                    to_remove = "~`!@#$%^&*(){}[];':<>|-=_+"
                    table = {ord(char): None for char in to_remove}
                    clean_v = v.translate(table)

                    #clean_v = v.translate(string.punctuation)
                    #print clean_v
                    for kk, vv in m.items():
                        if kk in clean_v:
                            nb[nk] = 'Proc. of ' + vv[0]
                            publish = 'publish' + (length - 7) * ' '
                            nb[publish] = vv[1]
                            flag = 1
                            break
                    if flag == 0:
                        nb[nk] = v
                        print v
                    continue

                elif nk.strip() == 'title' and 'booktitle' not in nk:
                    self.tilte = v
                    nv = v.split(' ')
                    for i in range(len(nv)):
                        # 标题除介词和冠词外,首字母大写
                        if nv[i] in self.prep or nv[i] in self.artie:
                            continue
                        # 首字母大写
                        else:
                            if 97 <= ord(nv[i][0]) <= 122:
                                nv[i] = chr(ord(nv[i][0])-32)+nv[i][1:]

                    v = ' '.join(nv)
                    nb[nk] = '{' + v + '}'
                    continue

                elif 'pages' in nk:
                    if '--' in v:
                        nb[nk] = v
                        continue
                    nb[nk] = v.replace('-', '--')
                    continue
                elif 'author' in nk:
                    if '\n' in v:
                        nb[nk] = v.replace('\n', ' ')
                        continue

                # 其他不做改变
                nb[nk] = v

            db = BibDatabase()
            db.entries = [nb]
            writer = BibTexWriter()
            writer.indent = '\t'  # indent entries with 4 spaces instead of one
            writer.comma_first = False  # place the comma at the beginning of the line
            with open(self.tilte+'.bib', 'wb') as bibfile:
                bibfile.write(writer.write(db))
Exemplo n.º 38
0
print("Looking for Dois...")
before = 0
new = 0
total = len(bibliography.entries)
for i,entry in enumerate(bibliography.entries):
    print("\r{i}/{total} entries processed, please wait...".format(i=i,total=total),flush=True,end="")
    try:
        if "doi" not in entry or entry["doi"].isspace():
            title = entry["title"]
            authors = get_authors(entry)
            for author in authors:
                doi_match = searchdoi(title,author)
                if doi_match:
                    doi = doi_match.groups()[0]
                    entry["doi"] = doi
                    new += 1
        else:
            before += 1
    except:
        pass
print("")

template="We added {new} DOIs !\nBefore: {before}/{total} entries had DOI\nNow: {after}/{total} entries have DOI"

print(template.format(new=new,before=before,after=before+new,total=total))
outfile = sys.argv[1]+"_doi.bib"
print("Writing result to ",outfile)
writer = BibTexWriter()
writer.indent = '    '     # indent entries with 4 spaces instead of one
with open(outfile, 'w') as bibfile:
    bibfile.write(writer.write(bibliography))
Exemplo n.º 39
0
            'class':'logging.StreamHandler',
        },
    },
    'loggers': {
        '': {
            'handlers': ['default'],
            'level': 'ERROR',
            'formatter': 'standard',
            'propagate': True
        }
    }
})

writer = BibTexWriter()
writer.contents = ['comments', 'entries']
writer.indent = '  '
writer.order_entries_by = ('ENTRYTYPE', 'author', 'year')

def create_id(t, year, title):
	return str(t) + "_" + str(year) + "_" + str(space_to_underscore(title))

def pdf(pdf_files, shared_pdf, bibtex_folder, bibtex_files, gscholar):
	for pdf in pdf_files:
		txt = re.sub("\W", " ", gs.convert_pdf_to_txt(pdf)).lower()
		#Research determined that the cutting of 35 words gives the 
		#highest accuracy
		words = txt.strip().split()[:35]
		words = " ".join(words)		
		print words
		if gscholar == True:
			bib = load(gs.pdflookup(pdf, all, gs.FORMAT_BIBTEX)[0])
def toString(e):
    writer = BibTexWriter()
    writer.indent = '    '     # indent entries with 4 spaces instead of one
    writer.comma_first = True  # place the comma at the beginning of the line

    return writer._entry_to_bibtex(e)