def term(page): data = {} # Store all tables with "datadisplaytable" class tables = iter(__parse(page).findall(".//table[@class='datadisplaytable']")) # Loop through every two tables as one (head and body) for head, body in zip(tables, tables): # Split table caption into three parts ["title", "key", "section"] title, course_key, section = head.find("caption").text.split(" - ") # Remove spaces from course key course_key = course_key.replace(" ", "") # Store all table head cells and body rows into arrays cells, rows = head.findall(".//td"), body.findall("tr") # Combine all course data course_data = dict( { "title": title, "section": section, "crn": int(cells[1].text), "ch": int(cells[5].text.strip()[0]) # Get & add lecture/lab details }, **__get_data(rows, title)) # If course key is new to term if data.get(course_key) is None: # Store the course with that key data[course_key] = course_data # If the course already exists else: # Store it as a lab of the previous course data[course_key]["lab"] = course_data return data
def course(page): # Store main table's caption and body caption, body = __parse(page).find( ".//table[@class='datadisplaytable']").findall("tr") # Split caption to get course's title, crn, key and section title, crn, course_key, section = caption.text_content().strip().split( " - ") # Return course data dictionary return { # In {course key: course data} format course_key.replace(" ", ""): dict( { "title": title, "section": section, "crn": int(crn), # Credits hours value isn't structured, so search among the strings to find it "ch": int( body.xpath("td/text()[contains(., 'Credits')]")[0].strip() [0]) # Get & add lecture/lab details }, **__get_data(body.findall(".//tr"), title)) }
def registered_terms(page): return { # Return a dictionary that contains {term code: term name} pairs __get_term_code(term_name.text): term_name.text # Loop through tags which contain term name for term_name in __parse(page).findall( ".//span[@class='fieldOrangetextbold']") }
def all_terms(response): # Initialize terms dictionary terms = {} # Loop through terms in academic calendar for term in __parse(response).findall(".//div[@class='pageTurn']/div/div"): # Split(" ") term & store it's season and year season, _, year = term.find("label").text.split() # Add term to terms dictionary as a {term name: term code} pair terms[season + " " + year.replace("/", "-")] = year[:4] + seasons_codes[season] return terms
def student_details(page): # Extract tables from page and store needed cells cells = __parse(page).findall(".//table[@class='datadisplaytable']/tr/td") # Return student's registration term, collage and major return { "term": __get_term_code(cells[2].text), "college": cells[4].text, "major": f"{cells[3].text} in {cells[5].text}".replace("Undergraduate in ", "") }
def holds(page): data = [] # Loop through holds table's rows which contain data for hold in __parse(page).findall( ".//table[@class='datadisplaytable']/tr")[1:]: # Store hold's row cells cells = hold.findall("td") # Add holds type, reason, start & end dates data.append({ "type": cells[0].text, "start": cells[1].text, "end": cells[2].text, "reason": cells[4].text }) return data
def final_exams(page): data = [] # Loop through finals table's rows which contain data for final in __parse(page).findall( ".//table[@class='datadisplaytable'][2]/tr")[1:]: # Store final's row cells cells = final.findall("td") # If final's date is announced (not all asterisk) if cells and any(letter != "*" for letter in cells[2].text): # Add final course key, title, date, start & end time and location data.append({ "course": cells[0].text, "date": cells[2].text, "start": cells[3].text, "end": cells[4].text, "location": __extract_location(cells[5].text.split()) }) return data
def term_events(response, term_code): # Initialize events and store year year, events = term_code[:4], [] # Define a function to clean cell from whitespaces clean = lambda cell: re.sub(" +", " ", cell.strip()) # Format term name in "Fall Semester 2017/2017" format from term code term_name = f"{seasons_codes[term_code[4:]]} {year}/{int(year) + 1}" # Loop through available terms calendars for term in __parse(response).findall(".//div[@class='pageTurn']/div/div"): # If calendar's label matches requested term name if term.find("label").text == term_name: # Loop through it's events (table rows) for event in term.findall(".//tbody/tr")[2:]: # Store event row's cells cells = event.findall("td") # Add event's date and text to events array events.append({ "date": clean(re.sub("(Till|-|–)", " - ", cells[1].text_content())).split(',')[0], "text": clean(cells[3].text_content()) }) return events