Пример #1
0
def scrape_past_papers():
    driver = load_site(SITE)

    years_data = driver.find_elements_by_xpath(
        "//div[@class='field-item even']/ul/li/a")

    earliest = int(years_data[-1].get_attribute("textContent"))
    latest = int(years_data[0].get_attribute("textContent"))

    years = list(reversed(range(earliest, latest + 1)))
    links = {course[0]: {year: [] for year in years} for course in COURSES}

    year_urls = [year_data.get_attribute("href") for year_data in years_data]

    for year, year_url in zip(years, year_urls):
        driver.get(year_url)

        for course_tup in COURSES:
            for course in course_tup[1:]:
                link = get_link(driver, course)
                if link is not None:
                    links[course_tup[0]][year] += [link]

    driver.quit()

    for course in COURSES:
        acronym = course[0]
        for year in years:
            urls = links[acronym][year]
            length = len(urls)
            for url, n in zip(urls, range(0, length)):
                mod = f"_{n}" if length > 1 else ""
                write_out(acronym, f"Past_papers/{acronym}-{year}{mod}", url)

    print("All done!")
Пример #2
0
def scrape_sft():
    driver = load_site(SITE)
    notes = get_urls(
        driver,
        "//div[@id='content-primary']/strong/center/a[contains(@href,'.pdf')]")
    probs = get_urls(
        driver,
        "//div[@id='content-primary']/ul/li/b/a[contains(@href,'.pdf')]")[4:]
    driver.quit()

    write_out_list(COURSE, "n", enumerate(notes))
    write_out_list(COURSE, "e", enumerate(probs, 1))

    print("All done!")
Пример #3
0
def scrape_exam_rubrics():
    driver = load_site(SITE)

    links = {}

    for course_tup in COURSES:
        for course in course_tup[1:]:
            link = get_link(driver, course)
            if link is not None:
                links[course_tup[0]] = link

    driver.quit()

    for course in COURSES:
        acronym = course[0]
        write_out(acronym, f"{acronym}-exam_rubric", links[acronym])

    print("All done!")
Пример #4
0
def scrape_sm():
    driver = load_site(SITE)
    raw = get_named_urls(driver, "//div[@class='main']/ul/li/a")
    driver.quit()

    lecture_notes = []
    problem_sheets = []
    lecture_number = 0
    for name, link in raw:
        if "Example" in name:
            problem_number = "".join([a for a in name if a.isdigit()])
            problem_sheets.append((problem_number, link))
        else:
            lecture_notes.append((lecture_number, link))
            lecture_number += 1

    write_out_list(COURSE, "n", lecture_notes)
    write_out_list(COURSE, "e", problem_sheets)

    print("All done!")
Пример #5
0
def scrape_aqft():
    driver = load_site(SITE_0)
    first = get_urls(driver, "//div[@id='content-primary']/ul/li/b/a")

    driver.get(SITE_1)
    second = get_named_urls(
        driver, "//body/table/tbody/tr/td/a[contains(@href,'.pdf')]")

    driver.quit()

    lecture_notes = first[:-4]
    problem_sheets = first[-4:]

    write_out_list(COURSE, "n", enumerate(lecture_notes, 1))
    write_out_list(COURSE, "e", enumerate(problem_sheets, 1))

    for name, url in second:
        if name[-4:] == ".pdf":
            name = name[:-4]
        write_out(COURSE, f"Example_sheets/Kai_Roehrig_solutions/{name}", url)

    print("All done!")