def parse(self): """Return a list of all courses formatted from the html file.""" tree = make_tree(self.url) is_summer = (self.semester == 'summer') tree = clean_html(tree, is_summer) tables = tree.xpath('//table') courses = [] for t in tables: td = t.xpath("td[contains(@class, 'cusistabledata')]") # Since it is not possible to find the tr elements using # lxml we find all the td elements and make a 2 dimensional # array representing the table. rows = [td[i:i + 8] for i in xrange(0, len(td), 8)] course_term = [] seen_course = {} # result = None for row in rows: course = Course() # Course name ex: COMP + 352 / 1 course_name = '{} {}'.format(row[2].text, row[3].text) # Group same course together. # result, seen_course = self.same_course( # course_name, seen_course) ((course.colorid, course.summary), seen_course) = self.same_course(course_name, seen_course) course.datetime_day = row[0].text course.time = row[1].text course.room = row[5].text course.campus = row[6].text course.professor = row[7].text course.section = row[4].text course.semester = self.semester # Append the summer section to the semester. if is_summer: course.semester += get_summer_section( course.section.split(' ')[1][0]) # Append the buildings address of a specific course and format # the data. course.format_data(self.buildings) course_term.append(course) # Make sure to not to have 2 instances of the same course. course_term = recurent_event_factor(course_term) courses.append(course_term) return courses