def filter_course_links(filter_array):
    course_links = scrape.read_from_file("links_courses.json")
    filtered_links = []

    for link in course_links:
        code = re.search(REGEX_COURSE_CODE, link).group(0)
        if code in filter_array:
            print(code)
        else:
            filtered_links.append(link)

    scrape.write_to_file("links_courses.json", filtered_links)
Пример #2
0
def fix_conditions():
    COURSES = scrape.read_from_file("courses.json")

    for code in COURSES:
        raw = COURSES[code]["conditions"]["raw"]
        if raw == None:
            continue

        COURSES[code]["conditions"] = process_course_conditions(raw, code)

    scrape.write_to_file("courses_better.json", COURSES)


# fix_conditions()
Пример #3
0
import scrape
import re

COURSES = scrape.read_from_file("courses.json")
FILTER_COURSE_CODES = scrape.read_from_file("filter_course_codes.json")

REGEX_COURSE_CODE = "[A-Z]{4}\d{4}"

for code in COURSES:
    terms = COURSES[code]["terms"]
    if terms == None:
        continue
    if "Summer Term" in terms:
        terms.remove("Summer Term")
    if "Term 1" in terms:
        terms.remove("Term 1")
    if "Term 2" in terms:
        terms.remove("Term 2")
    if "Term 3" in terms:
        terms.remove("Term 3")

    if terms == []:
        continue

    print(code, terms)

# scrape.write_to_file("courses.json", COURSES)
Пример #4
0
def get_all_course_codes():
    all_course_links = scrape.read_from_file("links_courses.json")
    return [re.search(REGEX_COURSE_CODE, x).group(0) for x in all_course_links]
Пример #5
0
from selenium import webdriver
from bs4 import BeautifulSoup
import random
import scrape
import json
import time
import re

LINKS = scrape.read_from_file("links_degrees.json")

PROGRAMS = scrape.read_from_file("programs.json")

WAIT = 20
REGEX_COURSE_CODE = "[A-Z]{4}\d{4}"
REGEX_SPECIALISATION_CODE = "^[A-Z]{5}[H\d]$"
REGEX_PROGRAM_CODE = "\d{4}"


@scrape.return_null_on_failure
def get_program_name(html):
    return html.find(
        "h2",
        class_="css-1b7bj3d-Heading-ComponentHeading-Heading-css-css ezav15i5"
    ).text


@scrape.return_null_on_failure
def get_program_code(html):
    return html.find(
        "h5",
        class_="introDetails__sub_heading css-ciwu9x-Subheading-css ezav15i1"
from selenium import webdriver
from bs4 import BeautifulSoup
import random
import scrape
import json
import time
import re

SUBJECT_AREAS = scrape.read_from_file("subject_areas.json")

SPECIALISATIONS = scrape.read_from_file("specialisations.json")
# SPECIALISATIONS = {}

LINKS = scrape.read_from_file("links_degrees.json")

WAIT = 20
REGEX_COURSE_CODE = "[A-Z]{4}\d{4}"
REGEX_SPECIALISATION_CODE = "[A-Z]{5}[H\d]"


@scrape.return_null_on_failure
def get_degree_name(html):
    return html.find(
        "h2",
        class_="css-1b7bj3d-Heading-ComponentHeading-Heading-css-css ezav15i5"
    ).text


@scrape.return_null_on_failure
def get_degree_code(html):
    return html.find(