def test_display_order(self): with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file: bib_database = bibtexparser.load(bibtex_file) writer = BibTexWriter() writer.contents = ['entries'] writer.display_order = ['year', 'publisher', 'title'] result = bibtexparser.dumps(bib_database, writer) expected = \ """@book{Toto3000, title = {A title}, author = {Toto, A and Titi, B} } @article{Wigner1938, year = {1938}, publisher = {The Royal Society of Chemistry}, title = {The transition state method}, author = {Wigner, E.}, doi = {10.1039/TF9383400029}, issn = {0014-7672}, journal = {Trans. Faraday Soc.}, owner = {fr}, pages = {29--41}, volume = {34} } @book{Yablon2005, year = {2005}, publisher = {Springer}, title = {Optical fiber fusion slicing}, author = {Yablon, A.D.} } """ self.assertEqual(result, expected)
def export_to_bibtex_one_file(self, path: str = "all.bib"): """stores publications in bibtex format in one file Parameters ---------- path : optional path where the resulting file should be stored, by default "all.bib" Raises ------ KeyError if the type of publication and the handle are not specified """ self._create_dir(path) for pub in self._dep_pubs: meta = pub.get_bibtex_representation() if not meta: print("This pub has no meta") else: if meta["type"] and meta["handle"]: handle = meta.pop("handle") pub_type = meta.pop("type") db = BibDatabase() db.entries = [meta.copy()] db.entries[0].update({"ID": handle, "ENTRYTYPE": pub_type}) writer = BibTexWriter() tot = [] for key in meta: tot.append(key) writer.display_order = tot with open(path, "a") as bibfile: bibfile.write(writer.write(db)) else: raise KeyError("the type of publication and metdata" + "are required")
def _writer(): ''' Return a configured bibtex writer. ''' writer = BibTexWriter() writer.indent = ' ' writer.order_entries_by = ('ID',) writer.display_order = ['title', 'author', 'editor'] return writer
def export(self, path: str = "./pubs/") -> None: """overrides superclass abstract method For each publication will be created a new folder with the title of that publication as the name of the folder. The bibtex file is named "cite.bib" and written inside the folder previously created. Parameters ---------- path : optional path where files should be saved, by default "./pubs/" Raises ------ KeyError if the type of publication and the handle are not specified """ self._create_dir(path) for pub in self._dep_pubs: meta = pub.get_bibtex_representation() if not meta: print("This pub has no meta") else: if meta["type"] and meta["handle"]: handle = meta.pop("handle") pub_type = meta.pop("type") db = BibDatabase() db.entries = [meta.copy()] db.entries[0].update({"ID": handle, "ENTRYTYPE": pub_type}) writer = BibTexWriter() tot = [] for key in meta: tot.append(key) # To keep the order of the elements inside the bibtex file writer.display_order = tot try: # Bibtex representation title has curly braces # meta["title"] = meta["title"][1:len(meta["title"]) - 1] my_dir = meta["title"].replace("/", "_").replace(" ", "-") \ .replace("\"", "") full_path = path + my_dir if not os.path.exists(full_path): os.mkdir(full_path, 0o755) with open(full_path + "/" + "cite.bib", "w") \ as bibfile: bibfile.write(writer.write(db)) else: with open(full_path + "/" + "cite.bib", "w") \ as bibfile: bibfile.write(writer.write(db)) except OSError: print("Creation of the directory failed {}", my_dir) else: raise KeyError("the type of publication and metdata" + "are required")
def bibfile_latex_to_unicode(bibtex_fname): parser = BibTexParser(common_strings=True) with open(bibtex_fname) as bibtex_file: bibdb = bibtexparser.load(bibtex_file, parser=parser) for i, entry in enumerate(bibdb.entries): delete_field(bibdb, i, 'file') for field in entry.keys(): bibdb.entries[i][field] = latex_to_unicode(entry[field]) bibdb.comments = [] writer = BibTexWriter() writer.display_order = ['title', 'year', 'author', 'journal', 'booktitle'] clean_file = writer.write(bibdb) # Use for debug purposes: # with open('tmp.bib','w') as f: # f.write(clean_file) return clean_file
def proc_bib(input_io: TextIOWrapper, output_io: TextIOWrapper, jdb: JournalDB, silent: bool = False, output_format: str = "bib", abbrev_type="iso4"): if not hasattr(Journal, abbrev_type): raise ValueError(f"Invalid abbreviation type `{abbrev_type}`") bib_db = bibtexparser.load(input_io) for entry in bib_db.entries: journaltitle = entry.get("journaltitle") if journaltitle is None: continue journaltitle = braces_regex.sub("", journaltitle) name_pattern = re.compile(fr"^{re.escape(journaltitle)}(:?.*)$", RegexFlag.IGNORECASE) # TODO: query using lambdas? # TODO: normalize names (just in index?). res = jdb.journals.query_one(Journal.names_key, name_pattern) if res: _, journal = res abbrev = getattr(journal, abbrev_type) if output_format == "bib": entry["journaltitle"] = f"{{{abbrev or journaltitle}}}" elif output_format == "sourcemap": gen_sourcemap_map(journal, journaltitle, abbrev, output_io) abbrev_msg = f"abbreviating to '{abbrev}'" if res else f"no abbreviation found" if not silent: info(f"found journal name '{journaltitle}'; {abbrev_msg}.") if output_format == "bib": bib_writer = BibTexWriter() bib_writer.add_trailing_comma = True bib_writer.display_order = None bib_writer.indent = "\t" bib_writer.order_entries_by = None bibtex_code = bib_writer.write(bib_db) output_io.write(bibtex_code) elif output_format == "sourcemap": pass
def main(): """Main function of the script. Loads the bib file, does the chcecking on it and prints out sorted and formated database. """ parser = argparse.ArgumentParser() parser.add_argument("--input", type=argparse.FileType('r'), default=sys.stdin, help="Input file, default is stdin.") parser.add_argument("--output", type=argparse.FileType('w'), default=sys.stdout, help="Optional output file.") parser.add_argument("--try-fix", default=False, action="store_true", help="Flag to search information to fix the dtabase.") parser.add_argument("--anthologies", type=str, nargs='+', help="List of BibTeX files with know papers.") args = parser.parse_args() if args.anthologies is not None: load_anthologies(args.anthologies) bib_database = bibtexparser.load(args.input, get_bibparser()) cache_journal_issn(bib_database) authors, journals, booktitles = check_database(bib_database, args.try_fix) look_for_misspellings(authors, 'Authors') look_for_misspellings(journals, 'Journals') look_for_misspellings(booktitles, 'Booktitles (proceedings)', threshold=0.9) writer = BibTexWriter() writer.indent = ' ' writer.order_by = ['author', 'year', 'title'] writer.display_order = ['author', 'title', 'booktitle', 'journal'] writer.align_values = True args.output.write(writer.write(bib_database))
def main(): print("Reading from stdin ...", end="", file=sys.stderr) input_records = sys.stdin.read().split("\n\n") print("done.", file=sys.stderr) bib_parser = BibTexParser(ignore_nonstandard_types=True, homogenize_fields=True, common_strings=True) writer = BibTexWriter() writer.indent = ' ' writer.order_by = ['author', 'year', 'title'] writer.display_order = ['author', 'title', 'booktitle', 'journal'] writer.align_values = True records = 0 skipped = 0 for record in input_records: if not record: continue try: parsed = bibtexparser.loads(record, bib_parser) records += 1 if records % 1000 == 0: print("Processed {} records.".format(records), file=sys.stderr) except (pyparsing.ParseException, bibtexparser.bibdatabase.UndefinedString): skipped += 1 for item in parsed.get_entry_list(): if "abstract" in item: del item["abstract"] parsed.comments = [] parsed.entries = [e for e in parsed.entries if e["ENTRYTYPE"] != "book"] parsed.entries = list(parsed.get_entry_dict().values()) print(writer.write(parsed)) print("Finished. {} records kept, {} skipped.".format(records, skipped), file=sys.stderr)
def generate_bib_from_arxiv(arxiv_item, value, field="id"): if field == "ti": article_id = arxiv_item["id"].split("http://arxiv.org/abs/")[1] else: article_id = value key = "arxiv:" + article_id title = arxiv_item.title authors = arxiv_item.authors if len(authors) > 0: authors = " and ".join([author["name"] for author in authors]) published = arxiv_item.published.split("-") year = '' if len(published) > 1: year = published[0] bib = BibDatabase() bib.entries = [{ "title": title, "author": authors, "year": year, "eprinttype": "arxiv", "eprint": article_id, "keywords": "", "abstract": arxiv_item.summary, "ID": key, "ENTRYTYPE": "article" }] writer = BibTexWriter() writer.add_trailing_comma = True writer.display_order = [ 'title', 'author', 'year', 'eprinttype', 'eprint', 'keywords', 'abstract' ] writer.indent = " " bib = writer.write(bib) return bib
############################################# # First we do Publications stuff import bibtexparser from bibtexparser.bwriter import BibTexWriter from bibtexparser.bibdatabase import BibDatabase import rfeed writer = BibTexWriter() writer.indent = ' ' writer.display_order = ('ENTRYTYPE', 'author', 'title', 'year', 'journal', 'booktitle', 'school', 'howpublished', 'editor', 'series', 'volume', 'issue', 'number', 'month', 'pages', 'numpages', 'publisher', 'organization', 'acmid', 'address', 'isbn', 'issn', 'location', 'language', 'doi', 'urldate', 'link', 'url', 'keyword', 'keywords', 'abstract') def entry_sort_key(entry): if 'urldate' in entry: return entry['urldate'] if 'link' not in entry: raise Exception("{} does not have attribute 'link'".format( entry['ID'])) return entry['year'] + "-01-01" def normalise_name(n):
def parse_bibtex_entry(entry, pub_dir="publication", featured=False, overwrite=False, normalize=False, dry_run=False): from academic.cli import log, LINKS_HEADER, ANTHOLOGY_LINK, ARXIV_LINK """Parse a bibtex entry and generate corresponding publication bundle""" log.info(f"Parsing entry {entry['ID']}") bundle_path = f"content/{pub_dir}/{slugify(entry['ID'])}" markdown_path = os.path.join(bundle_path, "index.md") cite_path = os.path.join(bundle_path, "cite.bib") date = datetime.utcnow() timestamp = date.isoformat("T") + "Z" # RFC 3339 timestamp. # Do not overwrite publication bundle if it already exists. if not overwrite and os.path.isdir(bundle_path): log.warning( f"Skipping creation of {bundle_path} as it already exists. " f"To overwrite, add the `--overwrite` argument.") return # Create bundle dir. log.info(f"Creating folder {bundle_path}") if not dry_run: Path(bundle_path).mkdir(parents=True, exist_ok=True) # Prepare YAML front matter for Markdown file. frontmatter = ["---"] frontmatter.append(f'title: "{clean_bibtex_str(entry["title"])}"') year = "" month = "01" day = "01" if "date" in entry: dateparts = entry["date"].split("-") if len(dateparts) == 3: year, month, day = dateparts[0], dateparts[1], dateparts[2] elif len(dateparts) == 2: year, month = dateparts[0], dateparts[1] elif len(dateparts) == 1: year = dateparts[0] if "month" in entry and month == "01": month = month2number(entry["month"]) if "year" in entry and year == "": year = entry["year"] if len(year) == 0: log.error(f'Invalid date for entry `{entry["ID"]}`.') frontmatter.append(f"date: {year}-{month}-{day}") frontmatter.append(f"publishDate: {timestamp}") authors = None if "author" in entry: authors = entry["author"] elif "editor" in entry: authors = entry["editor"] if authors: authors = clean_bibtex_authors( [i.strip() for i in authors.replace("\n", " ").split(" and ")]) frontmatter.append(f"authors: [{', '.join(authors)}]") frontmatter.append( f'publication_types: ["{PUB_TYPES.get(entry["ENTRYTYPE"], 0)}"]') if "abstract" in entry: frontmatter.append( f'abstract: "{clean_bibtex_str(entry["abstract"])}"') else: frontmatter.append('abstract: ""') frontmatter.append(f"featured: {str(featured).lower()}") # Publication name. if "booktitle" in entry: frontmatter.append( f'publication: "*{clean_bibtex_str(entry["booktitle"])}*"') elif "journal" in entry: frontmatter.append( f'publication: "*{clean_bibtex_str(entry["journal"])}*"') elif "publisher" in entry: frontmatter.append( f'publication: "*{clean_bibtex_str(entry["publisher"])}*"') else: frontmatter.append('publication: ""') if "venue" in entry: frontmatter.append( f'publication_short: "{clean_bibtex_str(entry["venue"])}"') del entry["venue"] if "keywords" in entry: frontmatter.append( f'tags: [{clean_bibtex_tags(entry["keywords"], normalize)}]') if "arxiv" or "anthology" in entry: frontmatter.append(LINKS_HEADER) if "anthology" in entry: frontmatter.append(ANTHOLOGY_LINK + clean_bibtex_str(entry["anthology"])) del entry["anthology"] if "arxiv" in entry: frontmatter.append(ARXIV_LINK + clean_bibtex_str(entry["arxiv"])) if "slides" in entry: frontmatter.append(f'url_slides: ' + entry['slides']) if "video" in entry: frontmatter.append(f'url_video: ' + entry['video']) if "doi" in entry: frontmatter.append(f'doi: "{entry["doi"]}"') if "recent" in entry: frontmatter.append(f'recent: {entry["recent"]}') del entry['recent'] frontmatter.append(f'url_pdf: papers/' + entry['ID'] + '.pdf') if 'code' in entry: frontmatter.append(f'url_code: ' + entry['code']) del entry['code'] frontmatter.append("---\n\n") # Save citation file. log.info(f"Saving citation to {cite_path}") db = BibDatabase() db.entries = [entry] writer = BibTexWriter() writer.display_order = ["title", "author", "booktitle", "month", "year", "address", "publisher",\ "pages","volume", "url", "arxiv", "abstract"] if not dry_run: with open(cite_path, "w", encoding="utf-8") as f: f.write(writer.write(db)) # Save Markdown file. try: log.info(f"Saving Markdown to '{markdown_path}'") if not dry_run: with open(markdown_path, "w", encoding="utf-8") as f: f.write("\n".join(frontmatter)) except IOError: log.error("Could not save file.")
"volume", "series", "editor", "year", "month", "date", "publisher", "address", "isbn", "issn", "articleno", "track", "doi", "url", "urlsuppl1", "urlsuppl2", "urlsuppl3", "presentation-video", "keywords", "abstract") # bibtex entries indented by a single space FIELD_INDENT = " " # Writer object to use for writing back nime proceedings in the correct format. writer = BibTexWriter() writer.indent = FIELD_INDENT writer.display_order = FIELD_ORDER writer.common_strings = False # would like it to write month 3-letter codes, but can't seem to avoid writing them at the start of each file weirdly. writer.order_entries_by = ("articleno", "url", "ID")
print("Output file exists") sys.exit(1) # read input with open(sys.argv[1], "r") as f: a = bibtexparser.load(f, BibTexParser(common_strings=True)) with open(sys.argv[2], "r") as f: b = bibtexparser.load(f, BibTexParser(common_strings=True)) # merge data bases merged = a merged.comments.extend(b.comments) merged.preambles.extend(b.preambles) merged.strings.update(b.strings) known = set(merged.entries_dict.keys()) for key, entry in b.entries_dict.items(): if key not in known: known.add(key) merged.entries.append(entry) # write to file writer = BibTexWriter() writer.indent = " " writer.add_trailing_comma = True writer.display_order = ["author", "title"] with open(sys.argv[3], "w") as f: f.write(writer.write(merged))