def export(self, path: str = "./pubs/") -> None: """overrides superclass abstract method For each publication will be created a new folder with the title of that publication as the name of the folder. The bibtex file is named "cite.bib" and written inside the folder previously created. Parameters ---------- path : optional path where files should be saved, by default "./pubs/" Raises ------ KeyError if the type of publication and the handle are not specified """ self._create_dir(path) for pub in self._dep_pubs: meta = pub.get_bibtex_representation() if not meta: print("This pub has no meta") else: if meta["type"] and meta["handle"]: handle = meta.pop("handle") pub_type = meta.pop("type") db = BibDatabase() db.entries = [meta.copy()] db.entries[0].update({"ID": handle, "ENTRYTYPE": pub_type}) writer = BibTexWriter() tot = [] for key in meta: tot.append(key) # To keep the order of the elements inside the bibtex file writer.display_order = tot try: # Bibtex representation title has curly braces # meta["title"] = meta["title"][1:len(meta["title"]) - 1] my_dir = meta["title"].replace("/", "_").replace(" ", "-") \ .replace("\"", "") full_path = path + my_dir if not os.path.exists(full_path): os.mkdir(full_path, 0o755) with open(full_path + "/" + "cite.bib", "w") \ as bibfile: bibfile.write(writer.write(db)) else: with open(full_path + "/" + "cite.bib", "w") \ as bibfile: bibfile.write(writer.write(db)) except OSError: print("Creation of the directory failed {}", my_dir) else: raise KeyError("the type of publication and metdata" + "are required")
def write_bibtex(bib_database, filename=None): """ If a filename is submitted we print to file, otherwise to stdout """ writer = BibTexWriter() if filename is not None: with open(filename, 'w') as bibfile: bibfile.write(writer.write(bib_database)) else: print(writer.write(bib_database))
def main(): parser = argparse.ArgumentParser() parser.add_argument( "--warning", help="Print if no entry has been found for a paper in the .bib", default="true") writer = BibTexWriter() liste = open("liste.txt", "r") res = open("references.bib", "w") line = liste.readline() while line: line = line.replace(" ", "+").replace(",", "") r = rq.post("http://dblp.org/search/publ/api?q=" + line + "&format=bib") if (r.text != ""): db = BibDatabase() bib_database = bibtexparser.loads(r.text) #print(bib_database.entries[0]) if (len(bib_database.entries) > 1): for i in range(0, len(bib_database.entries)): print(str(i) + " : " + str(bib_database.entries[i])) choice = -2 while (-1 > choice or choice > len(bib_database.entries) - 1): choice = int( input( 'Which reference do you want to add ? (-1 for none) : ' )) if (choice == -1): print("No reference has been added for \"" + line.replace("+", " ") + "\"") if (parser.parse_args().warning == "true"): res.write("WARNING : No reference added for \"" + line.replace("+", " ") + "\"") else: temp = [] temp.append(bib_database.entries[int(choice)]) print(temp) db.entries = temp res.write(writer.write(db)) #if(bib.database.entries) else: temp = [] temp.append(bib_database.entries[0]) db.entries = temp print(db.entries[0]) res.write(writer.write(db)) else: print("No result found for \"" + line.replace("+", " ") + "\"") if (parser.parse_args().warning == "true"): res.write("WARNING : No reference found for \"" + line.replace("+", " ") + "\"") line = liste.readline()
def main_resps(): in_file = Path("data/resps-perturbed-tides.txt") bibs = parse_citations(in_file) db = BibDatabase() db.entries = [ bib.to_bibtex() for bib in bibs ] id_to_count = defaultdict(lambda : 0) for entry in db.entries: id_to_count[entry["ID"]] += 1 for the_id, count in id_to_count.items(): if count > 1: for entry in [e for e in db.entries if e["ID"] == the_id]: count -= 1 entry["ID"] += ascii_lowercase[count] writer = BibTexWriter() writer.indent = " " with Path("data/resps-tides-perturbed-refs.bib").open("wb") as ref_file: ref_file.write(writer.write(db).encode())
def export_bibtex(author_name, fol, recursive=False): from bibtexparser.bwriter import BibTexWriter from bibtexparser.bibdatabase import BibDatabase db = BibDatabase() papers = set() bib_fnames = get_bib_files(fol, recursive) for bib_fname in tqdm(bib_fnames): with open(bib_fname) as bibtex_file: bib = bibtexparser.load(bibtex_file) for entry in bib.entries: paper_name = entry['title'] if paper_name in papers: continue papers.add(paper_name) authors = parse_authors(entry) if author_name in authors: db.entries.append(entry) author_name = author_name.replace(' ', '').replace(',', '_') bibtex_fname = op.join(fol, '{}.bib'.format(author_name)) writer = BibTexWriter() with open(bibtex_fname, 'w') as bibfile: bibfile.write(writer.write(db)) print('The bibtex file with {} papers of {} where she cited you was exported to {}'.format( len(db.entries), author_name, bibtex_fname))
def extract(keys_input, bibtex_input, bibtex_output, verbose): lines = keys_input.readlines() citation_keys = (line.strip() for line in lines) if verbose: print("Read {} keys from {}".format( len(lines), click.format_filename(keys_input.name))) main_bib = load_bib(bibtex_input) if verbose: print("Read {} entries from {}".format( len(main_bib.entries), click.format_filename(bibtex_input.name))) out_bib = BibDatabase() species_pattern = re.compile( r'({\\textless}i{\\textgreater}\w.*?{\\textless}/i{\\textgreater})') for key in citation_keys: e = main_bib.entries_dict[key] title = e['title'] groups = species_pattern.findall(title) for grp in groups: s = grp.replace('{\\textless}i{\\textgreater}', '').replace('{\\textless}/i{\\textgreater}', '') s = '\\textit{\\uppercase{' + s[0] + '}' + s[1:] + '}' title = title.replace(grp, s) e['title'] = title out_bib.entries.append(e) if verbose: print("Writing {} entries to {}".format( len(out_bib.entries), click.format_filename(bibtex_output.name))) writer = BibTexWriter() bibtex_output.write(writer.write(out_bib))
def format_paper_citation_dict(citation, indent=' '): """ Format a citation dict for a paper or a list of papers into a BibTeX record string. :param citation: A ``Paper`` citation dict or list of such dicts. :param indent: Indentation to be used in BibTeX output. """ if isinstance(citation, dict): entries = [citation] else: entries = citation # Handle conflicting ids for entries entries_ids = collections.defaultdict(lambda: 0) for entry in entries: entry_id = entry['ID'] entries_ids[entry_id] += 1 if entries_ids[entry_id] > 1: entry['ID'] = '%s_%s' % (entry_id, entries_ids[entry_id]) writer = BibTexWriter() writer.indent = indent with io.StringIO('') as bibfile: db = BibDatabase() db.entries = entries bibfile.write(writer.write(db)) return bibfile.getvalue().strip()
def main(keys_filename, bibtex_filename, output_filename, verbose): with open(keys_filename) as f: citation_keys = [line.strip() for line in f.readlines()] if verbose: print("Read {} keys from {}".format(len(citation_keys), keys_filename)) with open(bibtex_filename) as f: main_bib = load_bib(f) if verbose: print("Read {} entries from {}".format(len(main_bib.entries), bibtex_filename)) out_bib = BibDatabase() for key in citation_keys: e = main_bib.entries_dict[key] out_bib.entries.append(e) if verbose: print("Writing {} entries to {}".format(len(out_bib.entries), output_filename)) for ent in out_bib.entries: m = species_name_pattern.match(ent['title']) if m: prefix, species, postfix = m.groups() ent['title'] = prefix + r"\emph{" + species + r"}" + postfix for key in ['file', 'abstract', 'mendeley-tags', 'keyword', 'URL']: if key in ent: ent.pop(key) writer = BibTexWriter() with open(output_filename, 'w') as f: f.write(writer.write(out_bib))
def make_bibtex_file(pubs, pid, person_dir='.'): """Make a bibtex file given the publications Parameters ---------- pubs : list of dict The publications pid : str The person id person_dir : str, optional The person's directory """ if not HAVE_BIBTEX_PARSER: return None skip_keys = {'ID', 'ENTRYTYPE', 'author'} bibdb = BibDatabase() bibwriter = BibTexWriter() bibdb.entries = ents = [] for pub in pubs: ent = dict(pub) ent['ID'] = ent.pop('_id') ent['ENTRYTYPE'] = ent.pop('entrytype') for n in ['author', 'editor']: if n in ent: ent[n] = ' and '.join(ent[n]) for key in ent.keys(): if key in skip_keys: continue ent[key] = latex_safe(ent[key]) ents.append(ent) fname = os.path.join(person_dir, pid) + '.bib' with open(fname, 'w') as f: f.write(bibwriter.write(bibdb)) return fname
def save_tacl_bib(txt_file, year, volume): def name(n): t = n.split() return t[-1] + ', ' + ' '.join(t[:-1]) if len(t) > 1 else n.strip() entries = [] d = None for i, line in enumerate(open(txt_file)): line = line.strip() j = i % 3 if j == 0: authors = ' and '.join([name(n) for n in line[9:].split(';')]).strip() d = { 'ID': line[:8], 'ENTRYTYPE': 'article', 'author': authors, 'journal': 'Transactions of the Association for Computational Linguistics', 'year': str(year), 'volume': str(volume) } elif j == 1: d['title'] = line entries.append(d) db = BibDatabase() db.entries = entries writer = BibTexWriter() with open(txt_file + '.bib', 'w') as bout: bout.write(writer.write(db))
def exif_pdf(self, filename): fields = ["Author", "Year", "Journal", "Title", "Publisher", "Page", "Address", "Annote", "Booktitle", "Chapter", "Crossred", "Edition", "Editor", "HowPublished", "Institution", "Month", "Note", "Number", "Organization", "Pages", "School", "Series", "Type", "Url", "Volume", "Doi", "File"] op=pexif.get_json(filename) try: new_op = { field: str(value) for field in fields for key, value in op[0].items() if field.lower() in key.lower() } if 'Author' not in new_op: new_op['Author'] = 'Unknown' id_auth=new_op["Author"].split()[-1] id_tit = (new_op["Title"].split()[:2]) id_tit.append(id_auth) id_val = "_".join(id_tit) new_op["ID"] = str(id_val) new_op["ENTRYTYPE"] = "article" op[0] = new_op db = BibDatabase() db.entries = op writer = BibTexWriter() pdf_buff = (writer.write(db)) self.create_textview(pdf_buff) except: self.Messages.on_error_clicked("Can't extract data from this pdf file", "Try other methods")
def work_to_bibtex(work, name=None, acronym=False, rules=None): """Convert work to bibtex text Doctest: .. doctest:: >>> reload() >>> murta2014a = work_by_varname("murta2014a") >>> print(work_to_bibtex(murta2014a)) @inproceedings{murta2014a, address = {Cologne, Germany}, author = {Murta, Leonardo and Braganholo, Vanessa and Chirigati, Fernando and Koop, David and Freire, Juliana}, booktitle = {International Provenance and Annotation Workshop}, pages = {71--83}, publisher = {Springer}, title = {no{W}orkflow: capturing and analyzing provenance of scripts}, year = {2014} } <BLANKLINE> <BLANKLINE> Custom name: >>> reload() >>> murta2014a = work_by_varname("murta2014a") >>> print(work_to_bibtex(murta2014a, name="other")) @inproceedings{other, address = {Cologne, Germany}, author = {Murta, Leonardo and Braganholo, Vanessa and Chirigati, Fernando and Koop, David and Freire, Juliana}, booktitle = {International Provenance and Annotation Workshop}, pages = {71--83}, publisher = {Springer}, title = {no{W}orkflow: capturing and analyzing provenance of scripts}, year = {2014} } <BLANKLINE> <BLANKLINE> Use acronym for place name: >>> print(work_to_bibtex(murta2014a, acronym=True)) @inproceedings{murta2014a, address = {Cologne, Germany}, author = {Murta, Leonardo and Braganholo, Vanessa and Chirigati, Fernando and Koop, David and Freire, Juliana}, booktitle = {IPAW}, pages = {71--83}, publisher = {Springer}, title = {no{W}orkflow: capturing and analyzing provenance of scripts}, year = {2014} } <BLANKLINE> <BLANKLINE> """ result = work_to_bibtex_entry(work, name=name, acronym=acronym, rules=rules) db = BibDatabase() db.entries = [result] writer = BibTexWriter() writer.indent = " " return writer.write(db)
def make_bibtex_file(pubs, pid, person_dir="."): """Make a bibtex file given the publications Parameters ---------- pubs : list of dict The publications pid : str The person id person_dir : str, optional The person's directory """ if not HAVE_BIBTEX_PARSER: return None skip_keys = {"ID", "ENTRYTYPE", "author"} bibdb = BibDatabase() bibwriter = BibTexWriter() bibdb.entries = ents = [] for pub in pubs: ent = dict(pub) ent["ID"] = ent.pop("_id") ent["ENTRYTYPE"] = ent.pop("entrytype") for n in ["author", "editor"]: if n in ent: ent[n] = " and ".join(ent[n]) for key in ent.keys(): if key in skip_keys: continue ent[key] = latex_safe(str(ent[key])) ents.append(ent) fname = os.path.join(person_dir, pid) + ".bib" with open(fname, "w", encoding="utf-8") as f: f.write(bibwriter.write(bibdb)) return fname
def export_to_bibtex_one_file(self, path: str = "all.bib"): """stores publications in bibtex format in one file Parameters ---------- path : optional path where the resulting file should be stored, by default "all.bib" Raises ------ KeyError if the type of publication and the handle are not specified """ self._create_dir(path) for pub in self._dep_pubs: meta = pub.get_bibtex_representation() if not meta: print("This pub has no meta") else: if meta["type"] and meta["handle"]: handle = meta.pop("handle") pub_type = meta.pop("type") db = BibDatabase() db.entries = [meta.copy()] db.entries[0].update({"ID": handle, "ENTRYTYPE": pub_type}) writer = BibTexWriter() tot = [] for key in meta: tot.append(key) writer.display_order = tot with open(path, "a") as bibfile: bibfile.write(writer.write(db)) else: raise KeyError("the type of publication and metdata" + "are required")
def rekey(self, old_key, new_key): ''' Change the key of an existing document in the archive. ''' old_paths = self.get_doc(old_key).paths # If a new key has not been supplied, we take the key from the bibtex # file. if new_key is None: new_key = _key_from_bibtex(old_paths.bib_path) if self.has_key(new_key): msg = 'Archive already contains key {}. Aborting.'.format(new_key) raise LibraryException(msg) new_paths = DocumentPaths(self.archive_path, new_key) # Rename PDF and bibtex file and then rename the whole directory. shutil.move(old_paths.bib_path, os.path.join(old_paths.key_path, new_key + '.bib')) shutil.move(old_paths.pdf_path, os.path.join(old_paths.key_path, new_key + '.pdf')) shutil.move(old_paths.key_path, new_paths.key_path) # Write the new_key to the bibtex file with open(new_paths.bib_path, 'r') as f: bib_info = bibtexparser.load(f) bib_info.entries[0]['ID'] = new_key bib_writer = BibTexWriter() with open(new_paths.bib_path, 'w') as f: f.write(bib_writer.write(bib_info)) return new_key
def convert_csv_to_bibtex(self): csv_dict = self.csv.to_dict('records') writer = BibTexWriter() with open(self.output_path, 'w', encoding="utf-8") as bibtex_file: for csv_entry in csv_dict: bibtex_entry = self.convert_csv_entry_to_bibtex_entry(csv_entry) bibtex_file.write(writer.write(bibtex_entry))
def metaDictToBib(jobid, metadict, omit_keys, path_prefix): """Export meta data to bibtex format Args: jobid (int): id of job. metadict (DocMeta): meta dict of a doc. alt_dict (dict): dict for key changes. omit_keys (list): keys to omit in the converted dict. path_prefix (str): folder path to prepend to attachment file paths. Returns: rec (int): 0 if successful, 1 otherwise. jobid (int): the input jobid as it is. dbtext (str): formated bibtex entry, '' if <rec>==1. docid (int): id of the processed document. """ try: alt_dict = INV_ALT_KEYS ord_dict = toOrdinaryDict(metadict, alt_dict, omit_keys, path_prefix) db = BibDatabase() db.entries = [ ord_dict, ] writer = BibTexWriter() writer.indent = ' ' writer.comma_first = False dbtext = writer.write(db) return 0, jobid, dbtext, metadict['id'] except Exception: LOGGER.exception('Failed to write to bibtex') return 1, jobid, '', metadict['id']
def export(self): # Open StringIO to grab in-memory file contents file = io.StringIO() db = BibDatabase() for paper in self._papers: entry = { 'abstract': paper.abstract, 'title': paper.title, 'year': str(paper.published_at.year), 'ID': self.generate_id(paper), 'doi': paper.doi, 'author': self.generate_authors(paper) } if paper.journal: entry['journal'] = paper.journal.displayname if paper.is_preprint: entry['ENTRYTYPE'] = 'unpublished' else: entry['ENTRYTYPE'] = 'article' db.entries.append(entry) writer = BibTexWriter() file.write(writer.write(db)) return file
def load_and_replace(bibtex_file): with open(os.path.join('publications', bibtex_file), 'r', encoding="utf-8") as f: fdata = f.read() pdict = BibTexParser(fdata).get_entry_dict() plist = BibTexParser(fdata, bc.author).get_entry_list() by_year = {} for pub in plist: pubd = pdict[pub['ID']] db = BibDatabase() db.entries = [pubd] writer = BibTexWriter() writer.indent = '\t' bibentry = writer.write(db) pub['BIB_ENTRY'] = bibentry for field in pub: if field == 'BIB_ENTRY': continue pub[field] = context.make_replacements(pub[field]) pub['author'] = _format_author_list(pub['author']) y = int(pub['year']) if 'year' in pub else 1970 if y not in by_year: by_year[y] = [] by_year[y].append(pub) ret = [] for year, pubs in sorted(by_year.items(), reverse=True): for pub in pubs: ret.append(pub) return ret
def entries_to_file(entries, fn): writer = BibTexWriter() db = BibDatabase() db.entries = entries with codecs.open(fn, 'w', "utf-8") as bibtex_file: bibtex_file.write(writer.write(db))
def save(self, bibfile=-1): """ save the biblist with : - the original filename without any arg or - the given file name if not empty """ if bibfile == -1: bibfile = self.name db = BibDatabase() for item in self: db.entries.append(item) writer = BibTexWriter() # this class is needed to prepare format writer.indent = ' ' # indent entries with 4 spaces instead of one writer.comma_first = False # place the comma at the beginning of the line writer.align_values = True # with a nice indentation print('') print(os.path.join(os.path.expandvars('$PYBLIO_BIB'), bibfile)) print('') with open(os.path.join(os.path.expandvars('$PYBLIO_BIB'), bibfile), 'w') as bf: bf.write('\n') bf.write(writer.write(db)) bf.write('\n')
def getBibtexStrFromAbstractDict(abstractDict): abstractDict.pop('url') abstractDict.pop('journal') db = BibDatabase() writer = BibTexWriter() writer.indent = ' ' db.entries = [abstractDict] return writer.write(db)
def bibtex_cleaner(bibtext, option): try: bib_database = bibtexparser.loads(bibtext) cleaned_database = clean_entries(bib_database, option) writer = BibTexWriter() return writer.write(cleaned_database) except Exception: return 'Error. 入力形式はbibtexですか?(または変換プログラムのバグの可能性があります)\n'
def persist(self): if self.__bib: self.__bib.entries[0]['note'] = self.note self.__bib.entries[0]['tags'] = ';'.join(self.tags) writer = BibTexWriter() with open(self.__bib_file_path, 'w') as bibfile: bibtex = writer.write(self.__bib) bibfile.write(bibtex)
def test_bibexport(): a = bb.entries_dict['Yttri:Urban'] db = BibDatabase() db.entries = [a] writer = BibTexWriter() with open('testoutbib.bib', 'w') as bibfile: bibfile.write(writer.write(db)) sys.exit()
def write_bib_file(list_of_cited_entries, output_fname): db = BibDatabase() db.entries = list_of_cited_entries writer = BibTexWriter() with open(output_fname, 'w') as bibtex_file: bibtex_file.write(writer.write(db)) print('Output written to ' + output_fname)
def write_bib(bib_database, filen="dl4m.bib"): """Description of write_bib Write the items stored in bib_database into filen """ writer = BibTexWriter() writer.indent = ' ' writer.order_entries_by = ('noneyear', "author") with open(filen, "w", encoding="utf-8") as bibfile: bibfile.write(writer.write(bib_database))
def correct_lhc_authors(bib_tex): bib_data = bibtexparser.loads(bib_tex) entry = bib_data.entries[0] print(entry['collaboration']) entry['author'] = entry['collaboration'] + " Collaboration" bib_data.entries = [entry] writer = BibTexWriter() return writer.write(bib_data)
def write(self, path=""): path = self.path if not path else path bdb = BibDatabase() bdb.entries = self.entries bw = BibTexWriter() with open(path, 'w') as f: f.write(bw.write(bdb).encode('ascii', 'replace'))
def convert_to_bib(content, save_fpath): papers = parse_api_response(content) db = BibDatabase() db.entries = papers writer = BibTexWriter() writer.indent = " " writer.comma_first = True with open(save_fpath, "w+") as bibfile: bibfile.write(writer.write(db))
def convert(inFile, outFile): with open(str(inFile)) as data_file: data = json.load(data_file) db = BibDatabase() db.entries = [data] writer = BibTexWriter() with open(str(outFile), 'w') as bibfile: bibfile.write(writer.write(db))
def test_trailing_comma(self): with io.open(_data_path('article.bib'), 'r') as bibfile: bib = BibTexParser(bibfile.read()) with io.open(_data_path('article_trailing_comma_output.bib'), 'r') as bibfile: expected = bibfile.read() writer = BibTexWriter() writer.add_trailing_comma = True result = writer.write(bib) self.maxDiff = None self.assertEqual(expected, result)
def test_comma_first(self): with io.open(_data_path('book.bib'), 'r') as bibfile: bib = BibTexParser(bibfile.read()) with io.open(_data_path('book_comma_first.bib'), 'r') as bibfile: expected = bibfile.read() writer = BibTexWriter() writer.indent = ' ' writer.comma_first = True result = writer.write(bib) self.maxDiff = None self.assertEqual(expected, result)
def parsing_write(self, filename): # print(self.booklist) datalist = [] writer = BibTexWriter() writer.indent = ' ' for ref in self.TreeView.full_list: # print(type(ref)) datadict = dict((k, v) for k, v in zip(self.entries, ref) if v is not None) datalist.append(datadict) self.db.entries = datalist with open(filename, 'w') as bibfile: bibfile.write(writer.write(self.db))
def bibtex(self,simplified=False): if not self.__bib: return None from copy import deepcopy bib = deepcopy(self.__bib) if 'note' in bib.entries[0].keys(): del bib.entries[0]['note'] if 'tags' in bib.entries[0].keys(): del bib.entries[0]['tags'] if simplified: for k in ['doi','acmid','isbn', 'url','link']: if k in bib.entries[0].keys(): del bib.entries[0][k] writer = BibTexWriter() return writer.write(bib).strip()
def report_results_to_user(self): ''' write database to self.args.output_path and inform user ''' writer = BibTexWriter() with open(self.args.output_path,'w') as outfile: outfile.write(writer.write(self.args.bibtex_database)) print() print('Finished crawling the ACL anthology!') print('%s bibtex files were found' % self.total_found) print('%s bibtex files were added ' % self.total_added) print('You ran the program with the following settings:') print() print('overwrite',self.args.overwrite) print('queries:',self.args.queries) print() print('the output file can be found at: %s' % self.args.output_path)
def main(): if len(sys.argv) < 3: print("Wrong number of arguments. Usage: \n") print("python3 dump_db.py name.db dump.bib") print("Dump database") print("Database: ", sys.argv[1]) engine = create_engine('sqlite:///app.db') Session = sessionmaker() Session.configure(bind=engine) session = Session() db = BibDatabase() db.entries = [] dbentries = session.query(BiblioEntry) for e in dbentries: db.entries.append( {'journal': e.journal, 'title': e.title, 'year': str(e.year), 'publisher': e.publisher, 'school': e.school, 'ID': e.ID, 'url': e.url, 'author': e.authors, 'keyword': e.keywords, 'ENTRYTYPE': e.ENTRYTYPE} ) print("Write file on", sys.argv[2]) writer = BibTexWriter() with open(sys.argv[2], 'w') as bibfile: bibfile.write(writer.write(db)) session.close() print("Connection closed.")
def write_bib(db, order=False): """ Write bibtex string. Args: db (BibDatabase): database object to dump.. order (bool): whether to reorder entries upon writing. Returns: The dumped string. """ # Custom writer writer = BibTexWriter() writer.indent = '\t' writer.order_entries_by = None # Replace month by numeric value for entry in db.entries: if 'month' in entry and entry['month'] in MONTHS: entry['month'] = '{:02d}'.format(MONTHS.index(entry['month']) + 1) if order: # Manual sort order_entries_by = ('year', 'author', 'ID') sort_entries(db, order_entries_by) if not config.use_utf8_characters: db.entries = [nomenclature.encode_ascii_latex(entry) for entry in db.entries] if config.protect_uppercase: for entry in db.entries: entry["title"] = latex.protect_uppercase(entry["title"]) # Write bib string return writer.write(db)
record['error'] = 'tag' # record = c.type(record) # record = c.author(record) # record = c.editor(record) # record = c.journal(record) # record = c.keyword(record) # record = c.link(record) # record = c.doi(record) # record['p_authors'] = [] # if 'author' in record: # record['p_authors'] = [c.splitname(x, False) for x in record['author']] return record parser.customization = custom with open(args.target, 'r') as f: logging.info("Loading bibtex") db = b.load(f, parser) logging.info("Bibtex loaded") #Get errors and write them out: errored = [x for x in db.entries if 'error' in x] with open('{}.errors'.format(args.output), 'w') as f: f.write("\n".join(["{} : {}".format(x['ID'], x['error']) for x in errored])) writer = BibTexWriter() with open(args.output,'w') as f: f.write(writer.write(db))
class HtmlBuilder(object): btype = 'html' def __init__(self, rc): self.rc = rc self.bldir = os.path.join(rc.builddir, self.btype) self.env = Environment(loader=FileSystemLoader([ 'templates', os.path.join(os.path.dirname(__file__), 'templates'), ])) self.construct_global_ctx() if HAVE_BIBTEX_PARSER: self.bibdb = BibDatabase() self.bibwriter = BibTexWriter() def construct_global_ctx(self): self.gtx = gtx = {} rc = self.rc gtx['len'] = len gtx['True'] = True gtx['False'] = False gtx['None'] = None gtx['sorted'] = sorted gtx['groupby'] = groupby gtx['gets'] = gets gtx['date_key'] = date_key gtx['doc_date_key'] = doc_date_key gtx['level_val'] = level_val gtx['category_val'] = category_val gtx['rfc822now'] = rfc822now gtx['date_to_rfc822'] = date_to_rfc822 gtx['jobs'] = list(all_docs_from_collection(rc.client, 'jobs')) gtx['people'] = sorted(all_docs_from_collection(rc.client, 'people'), key=position_key, reverse=True) gtx['all_docs_from_collection'] = all_docs_from_collection def render(self, tname, fname, **kwargs): template = self.env.get_template(tname) ctx = dict(self.gtx) ctx.update(kwargs) ctx['rc'] = ctx.get('rc', self.rc) ctx['static'] = ctx.get('static', os.path.relpath('static', os.path.dirname(fname))) ctx['root'] = ctx.get('root', os.path.relpath('/', os.path.dirname(fname))) result = template.render(ctx) with open(os.path.join(self.bldir, fname), 'wt') as f: f.write(result) def build(self): rc = self.rc os.makedirs(self.bldir, exist_ok=True) self.root_index() self.people() self.projects() self.blog() self.jobs() self.nojekyll() self.cname() # static stsrc = os.path.join('templates', 'static') stdst = os.path.join(self.bldir, 'static') if os.path.isdir(stdst): shutil.rmtree(stdst) shutil.copytree(stsrc, stdst) def root_index(self): rc = self.rc self.render('root_index.html', 'index.html', title='Home') def people(self): rc = self.rc peeps_dir = os.path.join(self.bldir, 'people') os.makedirs(peeps_dir, exist_ok=True) for p in self.gtx['people']: names = frozenset(p.get('aka', []) + [p['name']]) pubs = self.filter_publications(names, reverse=True) bibfile = self.make_bibtex_file(pubs, pid=p['_id'], person_dir=peeps_dir) ene = p.get('employment', []) + p.get('education', []) ene.sort(key=ene_date_key, reverse=True) projs = self.filter_projects(names) self.render('person.html', os.path.join('people', p['_id'] + '.html'), p=p, title=p.get('name', ''), pubs=pubs, names=names, bibfile=bibfile, education_and_employment=ene, projects=projs) self.render('people.html', os.path.join('people', 'index.html'), title='People') def filter_publications(self, authors, reverse=False): rc = self.rc pubs = [] for pub in all_docs_from_collection(rc.client, 'citations'): if len(set(pub['author']) & authors) == 0: continue pubs.append(pub) pubs.sort(key=doc_date_key, reverse=reverse) return pubs def make_bibtex_file(self, pubs, pid, person_dir='.'): if not HAVE_BIBTEX_PARSER: return None self.bibdb.entries = ents = [] for pub in pubs: ent = dict(pub) ent['ID'] = ent.pop('_id') ent['ENTRYTYPE'] = ent.pop('entrytype') ent['author'] = ' and '.join(ent['author']) ents.append(ent) fname = os.path.join(person_dir, pid) + '.bib' with open(fname, 'w') as f: f.write(self.bibwriter.write(self.bibdb)) return fname def filter_projects(self, authors, reverse=False): rc = self.rc projs = [] for proj in all_docs_from_collection(rc.client, 'projects'): team_names = set(gets(proj['team'], 'name')) if len(team_names & authors) == 0: continue proj = dict(proj) proj['team'] = [x for x in proj['team'] if x['name'] in authors] projs.append(proj) projs.sort(key=id_key, reverse=reverse) return projs def projects(self): rc = self.rc projs = all_docs_from_collection(rc.client, 'projects') self.render('projects.html', 'projects.html', title='Projects', projects=projs) def blog(self): rc = self.rc blog_dir = os.path.join(self.bldir, 'blog') os.makedirs(blog_dir, exist_ok=True) posts = list(all_docs_from_collection(rc.client, 'blog')) posts.sort(key=ene_date_key, reverse=True) for post in posts: self.render('blog_post.html', os.path.join('blog', post['_id'] + '.html'), post=post, title=post['title']) self.render('blog_index.html', os.path.join('blog', 'index.html'), title='Blog', posts=posts) self.render('rss.xml', os.path.join('blog', 'rss.xml'), items=posts) def jobs(self): rc = self.rc jobs_dir = os.path.join(self.bldir, 'jobs') os.makedirs(jobs_dir, exist_ok=True) for job in self.gtx['jobs']: self.render('job.html', os.path.join('jobs', job['_id'] + '.html'), job=job, title='{0} ({1})'.format(job['title'], job['_id'])) self.render('jobs.html', os.path.join('jobs', 'index.html'), title='Jobs') def nojekyll(self): """Touches a nojekyll file in the build dir""" with open(os.path.join(self.bldir, '.nojekyll'), 'a+'): pass def cname(self): rc = self.rc if not hasattr(rc, 'cname'): return with open(os.path.join(self.bldir, 'CNAME'), 'w') as f: f.write(rc.cname)
mon1 = dateobj.strftime("%b") dateobj = datetime.date(2000, int(m.group(2)), 1) mon2 = dateobj.strftime("%b") bib_obj["month"] = ", %s-%s" % (mon1, mon2) month = int(m.group(1)) else: m = re.match("^[0-9]+$", month) if m: dateobj = datetime.date(2000, int(month), 1) mon = dateobj.strftime("%b") bib_obj["month"] = mon month = int(month) if conf_kwd is not None: bib_id = "%s%s%s%s" % (author_kwd, conf_kwd, year, title_kwd) else: bib_id = "%s%s%s" % (author_kwd, year, title_kwd) if bib_id in id_dict: raise id_dict[bib_id] = True bib_obj["ID"] = bib_id print bib_id db.entries.append(bib_obj) writer = BibTexWriter() with open("seokhwankim.bib", "w") as bibfile: bibfile.write(writer.write(db))
def formatText(self): if self.BibtexfilePath != '': self.openfile() else: self.readcontent() m = self.getMap() m['IEEE Global Communications Conference'] = m['IEEE Global Communications Conference, incorporating the Global Internet Symposium'] del m['IEEE Global Communications Conference, incorporating the Global Internet Symposium'] print m length = 0 nb = {} for bibtex in self.allbibtex: for key in bibtex.keys(): if len(key) > length and key != 'ENTRYTYPE': length = len(key) for k, v in bibtex.items(): if k == 'ENTRYTYPE' or k == 'ID': nb[k] = v continue elif k == 'ID': nb[k] = v continue elif k == 'doi' or k == 'ISSN' or k == 'keywords': continue elif v == '': continue elif 'url' in k: continue nk = k + (length - len(k)) * ' ' if 'booktitle' in nk: if '(' in v: v1 = v.split('(')[1].split(')')[0] nb[nk] = 'Proc. of ' + v1 continue flag = 0 # 未更改booktitle to_remove = "~`!@#$%^&*(){}[];':<>|-=_+" table = {ord(char): None for char in to_remove} clean_v = v.translate(table) #clean_v = v.translate(string.punctuation) #print clean_v for kk, vv in m.items(): if kk in clean_v: nb[nk] = 'Proc. of ' + vv[0] publish = 'publish' + (length - 7) * ' ' nb[publish] = vv[1] flag = 1 break if flag == 0: nb[nk] = v print v continue elif nk.strip() == 'title' and 'booktitle' not in nk: self.tilte = v nv = v.split(' ') for i in range(len(nv)): # 标题除介词和冠词外,首字母大写 if nv[i] in self.prep or nv[i] in self.artie: continue # 首字母大写 else: if 97 <= ord(nv[i][0]) <= 122: nv[i] = chr(ord(nv[i][0])-32)+nv[i][1:] v = ' '.join(nv) nb[nk] = '{' + v + '}' continue elif 'pages' in nk: if '--' in v: nb[nk] = v continue nb[nk] = v.replace('-', '--') continue elif 'author' in nk: if '\n' in v: nb[nk] = v.replace('\n', ' ') continue # 其他不做改变 nb[nk] = v db = BibDatabase() db.entries = [nb] writer = BibTexWriter() writer.indent = '\t' # indent entries with 4 spaces instead of one writer.comma_first = False # place the comma at the beginning of the line with open(self.tilte+'.bib', 'wb') as bibfile: bibfile.write(writer.write(db))
def write_bibtex_file(bib_database, output_name): writer = BibTexWriter() with open(output_name, "w") as bibtex_file: bibtex_file.write(writer.write(bib_database))
def main(): """ main routine """ parser = argparse.ArgumentParser(description="Update arXiv entries in a \ bibtex file with subsequently published papers.") parser.add_argument('IDfile', action='store', type=str, default=None, help='File containing list of arXiv IDs to search.') parser.add_argument('bibfile', action='store', type=str, default=False, help='BibTeX file') parser.add_argument('--quiet', action='store_true', default=False, help='Suppress printed output. (Overriden by \ --confirm).') parser.add_argument('--owner', action="store", default=None, type=str, help="Name to insert into BibTex entry under the \ 'owner' field.") args = parser.parse_args() bpw = BibTexWriter() # get today's timestamp for adding to the BibTex file timestamp = datetime.datetime.now().strftime("%Y.%m.%d") # make sure we can open the specified files if os.path.isfile(args.IDfile): IDs = open(args.IDfile, 'r') else: sys.stderr.write("Error, could not open: " + args.IDfile + ".\n") if os.path.isfile(args.bibfile): bib = codecs.open(args.bibfile, 'r', 'utf-8') bp = BibTexParser(bib.read(), common_strings=True) bib.close() else: sys.stderr.write("Error, could not open: " + args.bibfile + ".\n") sys.exit(1) # back up library before we start shutil.copy2(args.bibfile, args.bibfile + '-vox_votes_adder.bak') arxivlist = [] # first get a list of arXiv IDs already in the library for article in bp.entries: if 'eprint' in article.keys(): if not article['eprint'] in arxivlist: arxivlist.append(article['eprint']) # open the bibtex file, we'll just append new entires outf = codecs.open(args.bibfile, 'a', 'utf-8') newcount = 0 # now get bibtex entries from ADS for all new articles for ID in IDs: ID = ID.rstrip('\n') if ID in arxivlist or ID[0] == "#": # skip entries that we already have and comments continue # get ADS entry newref = getref(ID, args) # add owner information if args.owner is not None: newref.entries[0]['owner'] = args.owner # add timestamp information newref.entries[0]['timestamp'] = timestamp newcount += 1 newref = updatebibtexkey(newref) outf.write(bpw.write(newref)) outf.close() if newcount and not args.quiet: sys.stdout.write('{0:d} reference(s) added.\n'.format(newcount))
# record = c.keyword(record) # record = c.link(record) # record = c.doi(record) record['tags'] = [i.strip() for i in re.split(',|;', record["tags"].replace("\n",""))] # record['p_authors'] = [] # if 'author' in record: # record['p_authors'] = [c.splitname(x, False) for x in record['author']] return record parser.customization = custom with open(args.target, 'r') as f: logging.info("Loading bibtex") db = b.load(f, parser) #go through entries, creating a new db for each tag, and year, and author db_dict = {} for entry in db.entries: for tag in entry['tags']: if tag not in db_dict: db_dict[tag] = BibDatabase() db_dict[tag].entries.append(entry) logging.info("Writing Bibtex") writer = BibTexWriter() for k,v in db_dict.items(): with open(join(args.output, "{}.bib".format(k)),'w') as f: f.write(writer.write(v))
class CVBuilder(object): btype = 'cv' def __init__(self, rc): self.rc = rc self.bldir = os.path.join(rc.builddir, self.btype) self.env = Environment(loader=FileSystemLoader([ 'templates', os.path.join(os.path.dirname(__file__), 'templates'), ])) self.construct_global_ctx() if HAVE_BIBTEX_PARSER: self.bibdb = BibDatabase() self.bibwriter = BibTexWriter() def construct_global_ctx(self): self.gtx = gtx = {} rc = self.rc gtx['len'] = len gtx['True'] = True gtx['False'] = False gtx['None'] = None gtx['sorted'] = sorted gtx['groupby'] = groupby gtx['gets'] = gets gtx['date_key'] = date_key gtx['doc_date_key'] = doc_date_key gtx['level_val'] = level_val gtx['category_val'] = category_val gtx['rfc822now'] = rfc822now gtx['date_to_rfc822'] = date_to_rfc822 gtx['month_and_year'] = month_and_year gtx['latex_safe'] = latex_safe gtx['people'] = sorted(all_docs_from_collection(rc.client, 'people'), key=position_key, reverse=True) gtx['all_docs_from_collection'] = all_docs_from_collection def render(self, tname, fname, **kwargs): template = self.env.get_template(tname) ctx = dict(self.gtx) ctx.update(kwargs) ctx['rc'] = ctx.get('rc', self.rc) ctx['static'] = ctx.get('static', os.path.relpath('static', os.path.dirname(fname))) ctx['root'] = ctx.get('root', os.path.relpath('/', os.path.dirname(fname))) result = template.render(ctx) with open(os.path.join(self.bldir, fname), 'wt') as f: f.write(result) def build(self): os.makedirs(self.bldir, exist_ok=True) self.latex() self.pdf() self.clean() def latex(self): rc = self.rc for p in self.gtx['people']: names = frozenset(p.get('aka', []) + [p['name']]) pubs = self.filter_publications(names, reverse=True) bibfile = self.make_bibtex_file(pubs, pid=p['_id'], person_dir=self.bldir) emp = p.get('employment', []) emp.sort(key=ene_date_key, reverse=True) edu = p.get('education', []) edu.sort(key=ene_date_key, reverse=True) projs = self.filter_projects(names) aghs = self.awards_grants_honors(p) self.render('cv.tex', p['_id'] + '.tex', p=p, title=p.get('name', ''), aghs=aghs, pubs=pubs, names=names, bibfile=bibfile, education=edu, employment=emp, projects=projs) def filter_publications(self, authors, reverse=False): rc = self.rc pubs = [] for pub in all_docs_from_collection(rc.client, 'citations'): if len(set(pub['author']) & authors) == 0: continue bold_self = [] for a in pub['author']: if a in authors: bold_self.append('\\textbf{' + a + '}') else: bold_self.append(a) pub['author'] = bold_self pubs.append(pub) pubs.sort(key=doc_date_key, reverse=reverse) return pubs def make_bibtex_file(self, pubs, pid, person_dir='.'): if not HAVE_BIBTEX_PARSER: return None skip_keys = set(['ID', 'ENTRYTYPE', 'author']) self.bibdb.entries = ents = [] for pub in pubs: ent = dict(pub) ent['ID'] = ent.pop('_id') ent['ENTRYTYPE'] = ent.pop('entrytype') ent['author'] = ' and '.join(ent['author']) for key in ent.keys(): if key in skip_keys: continue ent[key] = latex_safe(ent[key]) ents.append(ent) fname = os.path.join(person_dir, pid) + '.bib' with open(fname, 'w') as f: f.write(self.bibwriter.write(self.bibdb)) return fname def filter_projects(self, authors, reverse=False): rc = self.rc projs = [] for proj in all_docs_from_collection(rc.client, 'projects'): team_names = set(gets(proj['team'], 'name')) if len(team_names & authors) == 0: continue proj = dict(proj) proj['team'] = [x for x in proj['team'] if x['name'] in authors] projs.append(proj) projs.sort(key=id_key, reverse=reverse) return projs def awards_grants_honors(self, p): """Make sorted awards grants and honrs list.""" aghs = [] for x in p.get('funding', ()): d = {'description': '{0} ({1}{2:,})'.format(latex_safe(x['name']), x.get('currency', '$').replace('$', '\$'), x['value']), 'year': x['year'], '_key': date_to_float(x['year'], x.get('month', 0)), } aghs.append(d) for x in p.get('service', []) + p.get('honors', []): d = {'description': latex_safe(x['name']), 'year': x['year'], '_key': date_to_float(x['year'], x.get('month', 0)), } aghs.append(d) aghs.sort(key=(lambda x: x.get('_key', 0.0)), reverse=True) return aghs def pdf(self): """Compiles latex files to PDF""" for p in self.gtx['people']: base = p['_id'] self.run(['latex'] + LATEX_OPTS + [base + '.tex']) self.run(['bibtex'] + [base + '.aux']) self.run(['latex'] + LATEX_OPTS + [base + '.tex']) self.run(['latex'] + LATEX_OPTS + [base + '.tex']) self.run(['dvipdf', base]) def run(self, cmd): subprocess.run(cmd, cwd=self.bldir, check=True) def clean(self): postfixes = ['*.dvi', '*.toc', '*.aux', '*.out', '*.log', '*.bbl', '*.blg', '*.log', '*.spl', '*~', '*.spl', '*.run.xml', '*-blx.bib'] to_rm = [] for pst in postfixes: to_rm += glob(os.path.join(self.bldir, pst)) for f in set(to_rm): os.remove(f)
before = 0 new = 0 total = len(bibliography.entries) for i,entry in enumerate(bibliography.entries): print("\r{i}/{total} entries processed, please wait...".format(i=i,total=total),flush=True,end="") try: if "doi" not in entry or entry["doi"].isspace(): title = entry["title"] authors = get_authors(entry) for author in authors: doi_match = searchdoi(title,author) if doi_match: doi = doi_match.groups()[0] entry["doi"] = doi new += 1 else: before += 1 except: pass print("") template="We added {new} DOIs !\nBefore: {before}/{total} entries had DOI\nNow: {after}/{total} entries have DOI" print(template.format(new=new,before=before,after=before+new,total=total)) outfile = sys.argv[1]+"_doi.bib" print("Writing result to ",outfile) writer = BibTexWriter() writer.indent = ' ' # indent entries with 4 spaces instead of one with open(outfile, 'w') as bibfile: bibfile.write(writer.write(bibliography))
def add_raw_bibtex(contents): parser = BibTexParser() bib = bibtexparser.loads(contents, parser=parser) writer = BibTexWriter() for i in bib.entries: print writer.write(i)
on_main = True for db in dbs: for entry in db.entries: if entry['ID'] not in keys: keys[entry['ID']] = entry if not on_main: not_in_main.append(entry) continue elif entry['ID'] not in conflicts: conflicts[entry['ID']] = [] conflicts[entry['ID']].append(entry) on_main = False logging.info("Conflicts: ") for k,v in conflicts.items(): orig = keys[k] logging.info("Original: {} - {} - {} - {}".format(orig['ID'], orig['author'], orig['year'], orig['title'])) for c in v: logging.info("Conflict: {} - {} - {} - {}".format(c['ID'], c['author'], c['year'], c['title'])) logging.info("-----") IPython.embed(simple_prompt=True) main.entries += not_in_main logging.info("Bibtex loaded") writer = BibTexWriter() with open(join(args.output, "integrated.bib"),'w') as f: f.write(writer.write(main))