def __init__(self, anthology=None, anthology_dir=None): # counts of how often each name appears self.first_count = defaultdict(lambda: 0) # "Maria" "Victoria" self.first_full_count = defaultdict(lambda: 0) # "Maria Victoria" self.last_count = defaultdict(lambda: 0) # "van" "den" "Bosch" self.last_full_count = defaultdict(lambda: 0) # "van den Bosch" self.first_total = 0 self.last_total = 0 if os.path.exists("names.cache"): self.load_cache() else: if anthology is None and anthology_dir is not None: anthology = Anthology(os.path.join(anthology_dir, "data")) self.count_names(anthology) self.dump_cache()
for first_letter, people_list in people.items(): with open("{}/people/{}.yaml".format(outdir, first_letter), "w") as f: yaml.dump(people_list, Dumper=Dumper, stream=f) progress.update() progress.close() if __name__ == "__main__": args = docopt(__doc__) scriptdir = os.path.dirname(os.path.abspath(__file__)) if "{scriptdir}" in args["--importdir"]: args["--importdir"] = os.path.abspath( args["--importdir"].format(scriptdir=scriptdir)) if "{scriptdir}" in args["--exportdir"]: args["--exportdir"] = os.path.abspath( args["--exportdir"].format(scriptdir=scriptdir)) log_level = log.DEBUG if args["--debug"] else log.INFO log.basicConfig(format="%(levelname)-8s %(message)s", level=log_level) tracker = SeverityTracker() log.getLogger().addHandler(tracker) log.info("Reading the Anthology data...") anthology = Anthology(importdir=args["--importdir"]) log.info("Exporting to YAML...") export_anthology(anthology, args["--exportdir"], dryrun=args["--dry-run"]) if tracker.highest >= log.ERROR: exit(1)
def main(args): scriptdir = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data') anthology = Anthology(importdir=scriptdir) attachments = defaultdict(list) revisions = [] errata = [] for line in sys.stdin: if not line.startswith("+"): continue line = line[1:].strip() if line.startswith("<attachment"): try: match_str = rf'<attachment type="(\w+)">({ANTHOLOGY_ID_REGEX}).*' match = re.match(match_str, line) attach_type, anthology_id = match.groups() except: print(f"* Couldn't match '{match_str}' to '{line}'", file=sys.stderr) attachments[attach_type].append(( anthology.papers[anthology_id].get_title('plain'), ANTHOLOGY_URL.format(anthology_id), )) elif line.startswith("<revision"): try: match_str = rf'<revision.*href="({ANTHOLOGY_ID_REGEX}).*>.*' match = re.match(match_str, line) anthology_id = match.group(1) except: print(f"* Couldn't match '{match_str}' to '{line}'", file=sys.stderr) paper = anthology.papers[anthology_id] explanation = paper.attrib["revision"][-1]["explanation"] revisions.append(( paper.get_title("plain"), ANTHOLOGY_URL.format(anthology_id), explanation, )) elif line.startswith("<errat"): try: match_str = rf"<errat.*?>({ANTHOLOGY_ID_REGEX}).*" match = re.match(match_str, line) anthology_id = match.group(1) except: print(f"* Couldn't match '{match_str}' to '{line}'", file=sys.stderr) errata.append(( anthology.papers[anthology_id].get_title('plain'), ANTHOLOGY_URL.format(anthology_id), )) inflector = inflect.engine() for attach_type, attachments in attachments.items(): phrase = inflector.a(attach_type) print(f"\nAdded {phrase}:") for title, url in attachments: print("-", title, "\n ", url, "\n") if len(revisions): print(f"\nRevisions:") for title, url, explanation in revisions: print("-", title, "\n ", url, "\n ", explanation, "\n") if len(errata): print(f"\nErrata:") for title, url in errata: print("-", title, "\n ", url, "\n")
def checkVideo(paper): for elem in paper.attachments: if elem["type"] == "video": return True return False args = docopt(__doc__) fromYear = int(args["--from-year"]) cacheVimeo = args["--cache-vimeo"] cacheMatchings = args["--cache-matchings"] v = vimeo.VimeoClient(token=personalAccessToken, key=clientId, secret=apiSecret) allpapers = Anthology(importdir="../data/").papers print("number of papers in anthology: ", len(allpapers)) papers = {k: v for k, v in allpapers.items() if int(v.attrib["year"]) > fromYear} print( "number of papers in anthology without video after " + str(fromYear) + ": ", len(papers), ) requestUrl = "/users/46432367/videos?per_page=100" cont = True nameUrls = {} numRequests = 0
contents = paper.as_bibtex() print(contents, file=file_paper) print(contents, file=file_anthology_with_abstracts) concise_contents = paper.as_bibtex(concise=True) print(concise_contents, file=file_volume) print(concise_contents, file=file_anthology) print(concise_contents, file=file_anthology_raw) if __name__ == "__main__": args = docopt(__doc__) scriptdir = os.path.dirname(os.path.abspath(__file__)) if "{scriptdir}" in args["--importdir"]: args["--importdir"] = os.path.abspath( args["--importdir"].format(scriptdir=scriptdir)) if "{scriptdir}" in args["--exportdir"]: args["--exportdir"] = os.path.abspath( args["--exportdir"].format(scriptdir=scriptdir)) log_level = log.DEBUG if args["--debug"] else log.INFO log.basicConfig(format="%(levelname)-8s %(message)s", level=log_level) tracker = SeverityTracker() log.getLogger().addHandler(tracker) anthology = Anthology(importdir=args["--importdir"], fast_load=True) create_bibtex(anthology, args["--exportdir"], clean=args["--clean"]) if tracker.highest >= log.ERROR: exit(1)
import os import sys from anthology import Anthology from anthology.people import PersonName from anthology.utils import deconstruct_anthology_id if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("collections", nargs="+") args = parser.parse_args() anthology = Anthology( importdir=os.path.join(os.path.dirname(sys.argv[0]), "..", "data")) # header print("name", "id", "title", sep="\t") for id_, paper in anthology.papers.items(): collection_id, volume_name, paper_id = deconstruct_anthology_id(id_) if collection_id in args.collections: authors = paper.attrib.get("author", []) if len(authors) > 0: # "authors" is a list of ("last name || first name", name-id or None) tuples first_author = authors[0][0] authors_papers = list( anthology.people.name_to_papers[first_author].values()) authors_papers = authors_papers[0] + authors_papers[1] if len(authors_papers) == 1:
if __name__ == "__main__": args = docopt(__doc__) scriptdir = os.path.dirname(os.path.abspath(__file__)) if "{scriptdir}" in args["--importdir"]: args["--importdir"] = os.path.abspath( args["--importdir"].format(scriptdir=scriptdir)) log_level = log.DEBUG if args["--debug"] else log.INFO log.basicConfig(format="%(levelname)-8s %(message)s", level=log_level) tracker = SeverityTracker() log.getLogger().addHandler(tracker) log.info("Instantiating the Anthology...") anthology = Anthology(importdir=args["--importdir"], require_bibkeys=False) log.info("Scanning for papers without <bibkey> tags...") write_bibkeys(anthology, args["--importdir"], commit=bool(args["--commit"])) if not args["--commit"]: if tracker.highest >= log.ERROR: log.warning( "There were errors! Please check them carefully before re-running this script with -c/--commit." ) else: log.warning( "Re-run this script with -c/--commit to save these changes to the XML files." )