Пример #1
0
    def __init__(self, anthology=None, anthology_dir=None):
        # counts of how often each name appears
        self.first_count = defaultdict(lambda: 0)  # "Maria" "Victoria"
        self.first_full_count = defaultdict(lambda: 0)  # "Maria Victoria"
        self.last_count = defaultdict(lambda: 0)  # "van" "den" "Bosch"
        self.last_full_count = defaultdict(lambda: 0)  # "van den Bosch"
        self.first_total = 0
        self.last_total = 0

        if os.path.exists("names.cache"):
            self.load_cache()
        else:
            if anthology is None and anthology_dir is not None:
                anthology = Anthology(os.path.join(anthology_dir, "data"))
            self.count_names(anthology)
            self.dump_cache()
        for first_letter, people_list in people.items():
            with open("{}/people/{}.yaml".format(outdir, first_letter),
                      "w") as f:
                yaml.dump(people_list, Dumper=Dumper, stream=f)
            progress.update()
        progress.close()


if __name__ == "__main__":
    args = docopt(__doc__)
    scriptdir = os.path.dirname(os.path.abspath(__file__))
    if "{scriptdir}" in args["--importdir"]:
        args["--importdir"] = os.path.abspath(
            args["--importdir"].format(scriptdir=scriptdir))
    if "{scriptdir}" in args["--exportdir"]:
        args["--exportdir"] = os.path.abspath(
            args["--exportdir"].format(scriptdir=scriptdir))

    log_level = log.DEBUG if args["--debug"] else log.INFO
    log.basicConfig(format="%(levelname)-8s %(message)s", level=log_level)
    tracker = SeverityTracker()
    log.getLogger().addHandler(tracker)

    log.info("Reading the Anthology data...")
    anthology = Anthology(importdir=args["--importdir"])
    log.info("Exporting to YAML...")
    export_anthology(anthology, args["--exportdir"], dryrun=args["--dry-run"])

    if tracker.highest >= log.ERROR:
        exit(1)
def main(args):
    scriptdir = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..',
                             'data')
    anthology = Anthology(importdir=scriptdir)

    attachments = defaultdict(list)
    revisions = []
    errata = []
    for line in sys.stdin:
        if not line.startswith("+"):
            continue

        line = line[1:].strip()
        if line.startswith("<attachment"):
            try:
                match_str = rf'<attachment type="(\w+)">({ANTHOLOGY_ID_REGEX}).*'
                match = re.match(match_str, line)
                attach_type, anthology_id = match.groups()
            except:
                print(f"* Couldn't match '{match_str}' to '{line}'",
                      file=sys.stderr)

            attachments[attach_type].append((
                anthology.papers[anthology_id].get_title('plain'),
                ANTHOLOGY_URL.format(anthology_id),
            ))

        elif line.startswith("<revision"):
            try:
                match_str = rf'<revision.*href="({ANTHOLOGY_ID_REGEX}).*>.*'
                match = re.match(match_str, line)
                anthology_id = match.group(1)
            except:
                print(f"* Couldn't match '{match_str}' to '{line}'",
                      file=sys.stderr)

            paper = anthology.papers[anthology_id]
            explanation = paper.attrib["revision"][-1]["explanation"]

            revisions.append((
                paper.get_title("plain"),
                ANTHOLOGY_URL.format(anthology_id),
                explanation,
            ))

        elif line.startswith("<errat"):
            try:
                match_str = rf"<errat.*?>({ANTHOLOGY_ID_REGEX}).*"
                match = re.match(match_str, line)
                anthology_id = match.group(1)
            except:
                print(f"* Couldn't match '{match_str}' to '{line}'",
                      file=sys.stderr)

            errata.append((
                anthology.papers[anthology_id].get_title('plain'),
                ANTHOLOGY_URL.format(anthology_id),
            ))

    inflector = inflect.engine()
    for attach_type, attachments in attachments.items():
        phrase = inflector.a(attach_type)
        print(f"\nAdded {phrase}:")
        for title, url in attachments:
            print("-", title, "\n ", url, "\n")

    if len(revisions):
        print(f"\nRevisions:")
        for title, url, explanation in revisions:
            print("-", title, "\n ", url, "\n ", explanation, "\n")

    if len(errata):
        print(f"\nErrata:")
        for title, url in errata:
            print("-", title, "\n ", url, "\n")
Пример #4
0
def checkVideo(paper):
    for elem in paper.attachments:
        if elem["type"] == "video":
            return True
    return False


args = docopt(__doc__)
fromYear = int(args["--from-year"])
cacheVimeo = args["--cache-vimeo"]
cacheMatchings = args["--cache-matchings"]

v = vimeo.VimeoClient(token=personalAccessToken, key=clientId, secret=apiSecret)

allpapers = Anthology(importdir="../data/").papers

print("number of papers in anthology: ", len(allpapers))

papers = {k: v for k, v in allpapers.items() if int(v.attrib["year"]) > fromYear}
print(
    "number of papers in anthology without video after " + str(fromYear) + ": ",
    len(papers),
)


requestUrl = "/users/46432367/videos?per_page=100"
cont = True
nameUrls = {}
numRequests = 0
Пример #5
0
                        contents = paper.as_bibtex()
                        print(contents, file=file_paper)
                        print(contents, file=file_anthology_with_abstracts)

                        concise_contents = paper.as_bibtex(concise=True)
                        print(concise_contents, file=file_volume)
                        print(concise_contents, file=file_anthology)
                        print(concise_contents, file=file_anthology_raw)


if __name__ == "__main__":
    args = docopt(__doc__)
    scriptdir = os.path.dirname(os.path.abspath(__file__))
    if "{scriptdir}" in args["--importdir"]:
        args["--importdir"] = os.path.abspath(
            args["--importdir"].format(scriptdir=scriptdir))
    if "{scriptdir}" in args["--exportdir"]:
        args["--exportdir"] = os.path.abspath(
            args["--exportdir"].format(scriptdir=scriptdir))

    log_level = log.DEBUG if args["--debug"] else log.INFO
    log.basicConfig(format="%(levelname)-8s %(message)s", level=log_level)
    tracker = SeverityTracker()
    log.getLogger().addHandler(tracker)

    anthology = Anthology(importdir=args["--importdir"], fast_load=True)
    create_bibtex(anthology, args["--exportdir"], clean=args["--clean"])

    if tracker.highest >= log.ERROR:
        exit(1)
Пример #6
0
import os
import sys

from anthology import Anthology
from anthology.people import PersonName
from anthology.utils import deconstruct_anthology_id

if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("collections", nargs="+")
    args = parser.parse_args()

    anthology = Anthology(
        importdir=os.path.join(os.path.dirname(sys.argv[0]), "..", "data"))

    # header
    print("name", "id", "title", sep="\t")

    for id_, paper in anthology.papers.items():
        collection_id, volume_name, paper_id = deconstruct_anthology_id(id_)
        if collection_id in args.collections:
            authors = paper.attrib.get("author", [])
            if len(authors) > 0:
                # "authors" is a list of ("last name || first name", name-id or None) tuples
                first_author = authors[0][0]
                authors_papers = list(
                    anthology.people.name_to_papers[first_author].values())
                authors_papers = authors_papers[0] + authors_papers[1]
                if len(authors_papers) == 1:

if __name__ == "__main__":
    args = docopt(__doc__)
    scriptdir = os.path.dirname(os.path.abspath(__file__))
    if "{scriptdir}" in args["--importdir"]:
        args["--importdir"] = os.path.abspath(
            args["--importdir"].format(scriptdir=scriptdir))

    log_level = log.DEBUG if args["--debug"] else log.INFO
    log.basicConfig(format="%(levelname)-8s %(message)s", level=log_level)
    tracker = SeverityTracker()
    log.getLogger().addHandler(tracker)

    log.info("Instantiating the Anthology...")
    anthology = Anthology(importdir=args["--importdir"], require_bibkeys=False)
    log.info("Scanning for papers without <bibkey> tags...")
    write_bibkeys(anthology,
                  args["--importdir"],
                  commit=bool(args["--commit"]))

    if not args["--commit"]:
        if tracker.highest >= log.ERROR:
            log.warning(
                "There were errors! Please check them carefully before re-running this script with -c/--commit."
            )
        else:
            log.warning(
                "Re-run this script with -c/--commit to save these changes to the XML files."
            )