def parse_bibtex_entry(entry, pub_dir="publication", featured=False, overwrite=False, normalize=False, dry_run=False): from academic.cli import log, LINKS_HEADER, ANTHOLOGY_LINK, ARXIV_LINK """Parse a bibtex entry and generate corresponding publication bundle""" log.info(f"Parsing entry {entry['ID']}") bundle_path = f"content/{pub_dir}/{slugify(entry['ID'])}" markdown_path = os.path.join(bundle_path, "index.md") cite_path = os.path.join(bundle_path, "cite.bib") date = datetime.utcnow() timestamp = date.isoformat("T") + "Z" # RFC 3339 timestamp. # Do not overwrite publication bundle if it already exists. if not overwrite and os.path.isdir(bundle_path): log.warning( f"Skipping creation of {bundle_path} as it already exists. " f"To overwrite, add the `--overwrite` argument.") return # Create bundle dir. log.info(f"Creating folder {bundle_path}") if not dry_run: Path(bundle_path).mkdir(parents=True, exist_ok=True) # Prepare YAML front matter for Markdown file. frontmatter = ["---"] frontmatter.append(f'title: "{clean_bibtex_str(entry["title"])}"') year = "" month = "01" day = "01" if "date" in entry: dateparts = entry["date"].split("-") if len(dateparts) == 3: year, month, day = dateparts[0], dateparts[1], dateparts[2] elif len(dateparts) == 2: year, month = dateparts[0], dateparts[1] elif len(dateparts) == 1: year = dateparts[0] if "month" in entry and month == "01": month = month2number(entry["month"]) if "year" in entry and year == "": year = entry["year"] if len(year) == 0: log.error(f'Invalid date for entry `{entry["ID"]}`.') frontmatter.append(f"date: {year}-{month}-{day}") frontmatter.append(f"publishDate: {timestamp}") authors = None if "author" in entry: authors = entry["author"] elif "editor" in entry: authors = entry["editor"] if authors: authors = clean_bibtex_authors( [i.strip() for i in authors.replace("\n", " ").split(" and ")]) frontmatter.append(f"authors: [{', '.join(authors)}]") frontmatter.append( f'publication_types: ["{PUB_TYPES.get(entry["ENTRYTYPE"], 0)}"]') if "abstract" in entry: frontmatter.append( f'abstract: "{clean_bibtex_str(entry["abstract"])}"') else: frontmatter.append('abstract: ""') frontmatter.append(f"featured: {str(featured).lower()}") # Publication name. if "booktitle" in entry: frontmatter.append( f'publication: "*{clean_bibtex_str(entry["booktitle"])}*"') elif "journal" in entry: frontmatter.append( f'publication: "*{clean_bibtex_str(entry["journal"])}*"') elif "publisher" in entry: frontmatter.append( f'publication: "*{clean_bibtex_str(entry["publisher"])}*"') else: frontmatter.append('publication: ""') if "venue" in entry: frontmatter.append( f'publication_short: "{clean_bibtex_str(entry["venue"])}"') del entry["venue"] if "keywords" in entry: frontmatter.append( f'tags: [{clean_bibtex_tags(entry["keywords"], normalize)}]') if "arxiv" or "anthology" in entry: frontmatter.append(LINKS_HEADER) if "anthology" in entry: frontmatter.append(ANTHOLOGY_LINK + clean_bibtex_str(entry["anthology"])) del entry["anthology"] if "arxiv" in entry: frontmatter.append(ARXIV_LINK + clean_bibtex_str(entry["arxiv"])) if "slides" in entry: frontmatter.append(f'url_slides: ' + entry['slides']) if "video" in entry: frontmatter.append(f'url_video: ' + entry['video']) if "doi" in entry: frontmatter.append(f'doi: "{entry["doi"]}"') if "recent" in entry: frontmatter.append(f'recent: {entry["recent"]}') del entry['recent'] frontmatter.append(f'url_pdf: papers/' + entry['ID'] + '.pdf') if 'code' in entry: frontmatter.append(f'url_code: ' + entry['code']) del entry['code'] frontmatter.append("---\n\n") # Save citation file. log.info(f"Saving citation to {cite_path}") db = BibDatabase() db.entries = [entry] writer = BibTexWriter() writer.display_order = ["title", "author", "booktitle", "month", "year", "address", "publisher",\ "pages","volume", "url", "arxiv", "abstract"] if not dry_run: with open(cite_path, "w", encoding="utf-8") as f: f.write(writer.write(db)) # Save Markdown file. try: log.info(f"Saving Markdown to '{markdown_path}'") if not dry_run: with open(markdown_path, "w", encoding="utf-8") as f: f.write("\n".join(frontmatter)) except IOError: log.error("Could not save file.")
def parse_bibtex_entry( entry, pub_dir="publication", featured=False, overwrite=False, normalize=False, dry_run=False, ): """Parse a bibtex entry and generate corresponding publication bundle""" from academic.cli import log log.info(f"Parsing entry {entry['ID']}") bundle_path = f"content/{pub_dir}/{slugify(entry['ID'])}" markdown_path = os.path.join(bundle_path, "index.md") cite_path = os.path.join(bundle_path, "cite.bib") date = datetime.utcnow() timestamp = date.isoformat("T") + "Z" # RFC 3339 timestamp. # Do not overwrite publication bundle if it already exists. if not overwrite and os.path.isdir(bundle_path): log.warning( f"Skipping creation of {bundle_path} as it already exists. " f"To overwrite, add the `--overwrite` argument.") return # Create bundle dir. log.info(f"Creating folder {bundle_path}") if not dry_run: Path(bundle_path).mkdir(parents=True, exist_ok=True) # Save citation file. log.info(f"Saving citation to {cite_path}") db = BibDatabase() db.entries = [entry] writer = BibTexWriter() if not dry_run: with open(cite_path, "w", encoding="utf-8") as f: f.write(writer.write(db)) # Prepare YAML front matter for Markdown file. hugo = utils.hugo_in_docker_or_local() subprocess.call(f"{hugo} new {markdown_path} --kind publication", shell=True) if "docker-compose" in hugo: time.sleep(2) page = EditableFM(bundle_path) page.load("index.md") page.fm["title"] = clean_bibtex_str(entry["title"]) year, month, day = "", "01", "01" if "date" in entry: dateparts = entry["date"].split("-") if len(dateparts) == 3: year, month, day = dateparts[0], dateparts[1], dateparts[2] elif len(dateparts) == 2: year, month = dateparts[0], dateparts[1] elif len(dateparts) == 1: year = dateparts[0] if "month" in entry and month == "01": month = month2number(entry["month"]) if "year" in entry and year == "": year = entry["year"] if len(year) == 0: log.error(f'Invalid date for entry `{entry["ID"]}`.') page.fm["date"] = "-".join([year, month, day]) page.fm["publishDate"] = timestamp authors = None if "author" in entry: authors = entry["author"] elif "editor" in entry: authors = entry["editor"] if authors: authors = clean_bibtex_authors( [i.strip() for i in authors.replace("\n", " ").split(" and ")]) page.fm["authors"] = authors page.fm["publication_types"] = [PUB_TYPES.get(entry["ENTRYTYPE"], "0")] if "abstract" in entry: page.fm["abstract"] = clean_bibtex_str(entry["abstract"]) else: page.fm["abstract"] = "" page.fm["featured"] = featured # Publication name. if "booktitle" in entry: publication = "*" + clean_bibtex_str(entry["booktitle"]) + "*" elif "journal" in entry: publication = "*" + clean_bibtex_str(entry["journal"]) + "*" elif "publisher" in entry: publication = "*" + clean_bibtex_str(entry["publisher"]) + "*" else: publication = "" page.fm["publication"] = publication if "keywords" in entry: page.fm["tags"] = clean_bibtex_tags(entry["keywords"], normalize) if "url" in entry: page.fm["url_pdf"] = clean_bibtex_str(entry["url"]) if "doi" in entry: page.fm["doi"] = clean_bibtex_str(entry["doi"]) # Save Markdown file. try: log.info(f"Saving Markdown to '{markdown_path}'") if not dry_run: page.dump() except IOError: log.error("Could not save file.")
def parse_bibtex_entry( entry, pub_dir="publication", featured=False, overwrite=False, normalize=False, dry_run=False, ): """Parse a bibtex entry and generate corresponding publication bundle""" from academic.cli import log log.info(f"Parsing entry {entry['ID']}") bundle_path = f"content/{pub_dir}/{slugify(entry['ID'])}" markdown_path = os.path.join(bundle_path, "index.md") cite_path = os.path.join(bundle_path, "cite.bib") date = datetime.utcnow() timestamp = date.isoformat("T") + "Z" # RFC 3339 timestamp. # Do not overwrite publication bundle if it already exists. if not overwrite and os.path.isdir(bundle_path): log.warning( f"Skipping creation of {bundle_path} as it already exists. " f"To overwrite, add the `--overwrite` argument.") return # Create bundle dir. log.info(f"Creating folder {bundle_path}") if not dry_run: Path(bundle_path).mkdir(parents=True, exist_ok=True) # Filter some bib fields entry_fields_filter = ["file"] entry = {k: v for k, v in entry.items() if k not in entry_fields_filter} # for key in entry: # print(key+": "+entry[key]) # pass # Save citation file. log.info(f"Saving citation to {cite_path}") db = BibDatabase() db.entries = [entry] writer = BibTexWriter() if not dry_run: with open(cite_path, "w", encoding="utf-8") as f: f.write(writer.write(db)) # Prepare YAML front matter for Markdown file. hugo = utils.hugo_in_docker_or_local() if not dry_run: subprocess.call(f"{hugo} new {markdown_path} --kind publication", shell=True) if "docker-compose" in hugo: time.sleep(2) page = EditableFM(Path(bundle_path), dry_run=dry_run) page.load(Path("index.md")) page.fm["title"] = clean_bibtex_str(entry["title"]) if 'shorttitle' in entry: page.fm["shorttitle"] = clean_bibtex_str(entry["shorttitle"]) year, month, day = "", "01", "01" if "date" in entry: dateparts = entry["date"].split("/")[0].split("-") if len(dateparts) == 3: year, month, day = dateparts[0], dateparts[1], dateparts[2] elif len(dateparts) == 2: year, month = dateparts[0], dateparts[1] elif len(dateparts) == 1: year = dateparts[0] if "month" in entry and month == "01": month = month2number(entry["month"]) if "year" in entry and year == "": year = entry["year"] if len(year) == 0: log.error(f'Invalid date for entry `{entry["ID"]}`.') page.fm["date"] = "-".join([year, month, day]) page.fm["publishDate"] = timestamp # allow for pubstate (in press, submitted, ...) to appear - unless # 'unpublished' if "howpublished" in entry: if "unpublished" not in entry["howpublished"].lower(): page.fm["weight"] = 1 page.fm["publication_status"] = entry["howpublished"] if "eventtitle" in entry: page.fm["event"] = clean_bibtex_str(entry["eventtitle"]) if "eventurl" in entry: page.fm["event_url"] = entry["eventurl"] if "type" in entry: if entry["type"].lower() != "talk": page.fm["eventtype"] = entry["type"] if "venue" in entry: page.fm["location"] = entry["venue"] authors = None if "author" in entry: authors = entry["author"] elif "editor" in entry: authors = entry["editor"] if authors: authors = clean_bibtex_authors( [i.strip() for i in authors.replace("\n", " ").split(" and ")]) page.fm["authors"] = authors pubtype = PUB_TYPES.get(entry["ENTRYTYPE"], PublicationType.Uncategorized) if "entrysubtype" in entry: if (entry["ENTRYTYPE"] == "article" and entry["entrysubtype"].lower() == "newspaper"): pubtype = PublicationType.NewspaperArticle # For talks we want # - NO: 'publication_types' # - YES: 'all_day: true' # lazy check: are we writing in a directory containing 'talk' in its name? if "talk" in pub_dir: page.fm["all_day"] = True else: page.fm["publication_types"] = [str(pubtype.value)] if "abstract" in entry: page.fm["abstract"] = clean_bibtex_str(entry["abstract"]) else: page.fm["abstract"] = "" page.fm["featured"] = featured # Publication name (booktitle/journaltitle). if "booktitle" in entry: publication = "*" + clean_bibtex_str(entry["booktitle"]) + "*" elif 'journaltitle' in entry: publication = "*" + clean_bibtex_str(entry["journaltitle"]) + "*" elif "journal" in entry: publication = "*" + clean_bibtex_str(entry["journal"]) + "*" elif "publisher" in entry: publication = "*" + clean_bibtex_str(entry["publisher"]) + "*" else: publication = "" page.fm["publication"] = publication # Publication short name (shortjournal). if "shortjournal" in entry: page.fm["publication_short"] = "*" + clean_bibtex_str( entry["shortjournal"]) + "*" else: page.fm["publication_short"] = "" if "keywords" in entry: page.fm["tags"] = clean_bibtex_tags(entry["keywords"], normalize) if "url" in entry: url_type = "url_pdf" if "urltype" in entry: url_type = "url_" + entry["urltype"] page.fm[url_type] = clean_bibtex_str(entry["url"]) # General url_*: should work for url_{video,slides,code,etc.} for key in entry: if key.startswith("url") and key != "urltype" and len(key) > 3: page.fm["url_" + key[3:]] = clean_bibtex_str(entry[key]) if "doi" in entry: page.fm["doi"] = clean_bibtex_str(entry["doi"]) # Save Markdown file. try: log.info(f"Saving Markdown to '{markdown_path}'") if not dry_run: page.dump() except IOError: log.error("Could not save file.") return page