示例#1
0
def update_classes(force_update=False):
    if not force_update and not wait_until_updated():
        return

    depts_to_delete = {
        dept.dept_id: dept for dept in db_session.query(Dept).all()}

    r = requests.get('http://classutil.unsw.edu.au/')
    main_page = BeautifulSoup(r.text, 'html.parser')

    # loop through all the departments on the main page
    for row in main_page.find_all('table')[1].find_all('tr'):
        data = row.find_all('td')
        if data[0]['class'][0] == 'cutabhead':
            # row describes the campus of the below departments
            pass
        elif data[0]['class'][0] == 'data':
            # row describes a department
            links = data[:4]
            dept_info = data[4:]
            links = [d.a['href'] if d.a is not None else None for d in links]
            dept_id, name = (d.get_text() for d in dept_info)
            link = links[CURRENT_TERM]
            # check if the department runs in the current semester
            if link is not None:
                depts_to_delete.pop(dept_id, None)
                scrape_dept(dept_id, name, link)

    for dept in depts_to_delete.values():
        db_session.delete(dept)

    db_session.commit()
示例#2
0
def save_alerts():
    # get info from the form
    # if something is invalid or they haven't given a contact or chosen classes
    # just show an error page because they've gotten past the javascript error
    # handling somehow and repopulating the chosen classes list would be super
    # annoying
    # I guess it's still TODO worthy (I'll probably never do it though)
    post_data = request.get_json()

    if not post_data:
        return render_template('error.html', error='Something went wrong')

    if post_data.get('email'):
        contact = post_data['email']
        contact_type = CONTACT_TYPE_EMAIL
        if not re.match(r'^[^@]+@[^@]+\.[^@]+$', contact):
            return render_template('error.html',
                                   error='Please enter a valid email address')
    elif post_data.get('phonenumber'):
        contact = re.sub(r'[^0-9+]', '', post_data['phonenumber'])
        contact_type = CONTACT_TYPE_SMS
        if not re.match(r'^(04|\+?614)\d{8}$', contact):
            return render_template('error.html',
                                   error='Please enter a valid Australian ' +
                                   'phone number')
    elif post_data.get('yoname'):
        contact = post_data['yoname'].upper()
        contact_type = CONTACT_TYPE_YO
        if (not re.match(r'^(\d|\w)+$', contact)
                or not is_valid_yo_name(contact)):
            return render_template('error.html',
                                   error='Please enter a valid YO username')
    else:
        return render_template('error.html',
                               error='Please enter some contact info before ' +
                               'submitting.')

    # get course info from db
    klass_ids = post_data.get('classids', [])
    klasses = db_session.query(Klass).filter(
        Klass.klass_id.in_(klass_ids)).all()
    if not klasses:
        return render_template('error.html',
                               error='Please select at least one class ' +
                               'before submitting.')

    for klass in klasses:
        alert = Alert(klass_id=klass.klass_id,
                      contact_type=contact_type,
                      contact=contact)
        db_session.add(alert)
    db_session.commit()
    courses = klasses_to_template_courses(klasses)

    return render_template('alert.html',
                           contact_type=contact_type_description(contact_type),
                           contact=contact,
                           courses=courses,
                           success_page=True)
示例#3
0
def check_alerts():
    triggered_alerts = []
    for alert in db_session.query(Alert):
        if alert.should_alert():
            triggered_alerts.append(alert)

    successful_alerts = send_alerts(triggered_alerts)
    for alert in successful_alerts:
        db_session.delete(alert)

    capture_message(('Tried to send %d alerts, %d succeeded' %
                           (len(triggered_alerts), len(successful_alerts))),
                          level='info')

    db_session.commit()
示例#4
0
def scrape_dept(dept_id, name, page):
    '''scrape all the courses in a department'''
    # add the dept to the db
    dept = Dept(dept_id=dept_id, name=name)
    db_session.merge(dept)

    courses_to_delete = {course.compound_id_tuple: course for course in
                         db_session.query(Course).filter_by(dept_id=dept_id)
                         .all()}

    r = requests.get('http://classutil.unsw.edu.au/' + page)
    dept_page = BeautifulSoup(r.text, 'html.parser')
    klasses = []
    course_id = ''
    tables = dept_page.find_all('table')
    if len(tables) < 3:
      return
    for row in tables[2].find_all('tr'):
        data = row.find_all('td')
        if data[0].get('class', [''])[0] == 'cucourse':
            # row is the code and name of a course
            row_course_id = data[0].b.get_text()[4:8]
            if row_course_id == course_id:
                # every now and again we get multiple title rows for the same
                # course
                continue
            if klasses:
                # scrape all the classes from the previous course and empty the
                # array
                courses_to_delete.pop((dept_id, course_id), None)
                scrape_course_and_classes(course_id, dept_id, name, klasses)
                klasses = []
            course_id = row_course_id
            name = data[1].get_text()
        elif (row.get('class', [''])[0] == 'rowHighlight' or
              row.get('class', [''])[0] == 'rowLowlight'):
            # row is info about a class
            klasses.append(data)
    # scrape the classes from the last course
    courses_to_delete.pop((dept_id, course_id), None)
    scrape_course_and_classes(course_id, dept_id, name, klasses)

    for course in courses_to_delete.values():
        db_session.delete(course)

    db_session.commit()
示例#5
0
def show_alert():
    klass_ids = request.args.get('classids', '')
    courses = []

    if klass_ids:
        klass_ids = klass_ids.split(',')
        # filter out all non-numeric ids
        klass_ids = [
            klass_id for klass_id in klass_ids if re.match(r'^\d+$', klass_id)
        ]
        # get course info from db
        klasses = db_session.query(Klass).filter(
            Klass.klass_id.in_(klass_ids)).all()
        courses = klasses_to_template_courses(klasses)

    @after_this_request
    def add_header(response):
        response.cache_control.no_store = True
        return response

    return render_template('alert.html', courses=courses)
示例#6
0
def validate_klass_id(klass_id):
    klass_id = int(klass_id)
    if not db_session.query(
            exists().where(Klass.klass_id == klass_id)).scalar():
        raise KeyError
    return klass_id
示例#7
0
def scrape_course_and_classes(course_id, dept_id, name, klasses):
    '''scrape all the classes in a course'''
    # add the course to the db
    course = Course(course_id=course_id, dept_id=dept_id, name=name)
    db_session.merge(course)

    # remove from this as we find classes, eventually only the classes that are
    # no longer on classutil will appear here
    klasses_to_delete = {klass.klass_id:
                         klass for klass in db_session.query(Klass)
                         .filter_by(course_id=course_id, dept_id=dept_id)
                         .all()}

    # Crappy hack because some course decided to list two classes with the
    # same id...
    klasses_added = set()

    for row in klasses:
        klass_type, _, klass_id, _, status, enrollment, _, time_and_place = (
            d.get_text() for d in row)
        status = web_status_to_db_status(status)
        klass_id = int(klass_id)
        m = re.search(r'(\d+)/(\d+).*', enrollment)
        enrolled = int(m.group(1))
        capacity = int(m.group(2))

        if klass_id in klasses_added:
            continue

        # if this is a new class or the timeslot raw text has changed since we
        # last saw it
        if klass_id not in klasses_to_delete or (
                hash(time_and_place) % POSTGRES_MAX_INT !=
                klasses_to_delete[klass_id].timeslot_raw_string_hash):
            # if the klass already existed, delete all existing timeslots and
            # recreate them
            if klass_id in klasses_to_delete:
                for timeslot in klasses_to_delete[klass_id].timeslots:
                    db_session.delete(timeslot)

            mentioned_times = set()

            for time_and_place_part in time_and_place.split(';'):
                m = re.search(
                    r'(\w+) +(\d+(?::\d+)?(?:-\d+(?::\d+)?)?) *#? ' +
                    r'*(?: *\((?:.*, *)*(.*?)\))?',
                    time_and_place_part)

                if m:
                    day = web_day_to_int_day(m.group(1))

                    time = m.group(2)
                    if '-' in time:
                        start_time, end_time = map(
                            hour_of_day_to_seconds_since_midnight,
                            time.split('-'))
                    else:
                        start_time = hour_of_day_to_seconds_since_midnight(
                            time)
                        end_time = start_time + 60 * 60

                    # only add a timeslot for the first time a specific day/time
                    # is mentioned to avoid situations where
                    # the location changes throughout the semester - we'll only
                    # list the first location
                    if (day, time) not in mentioned_times:
                        mentioned_times.add((day, time))
                        location = m.group(3)
                        # as a last resort, filter out any locations we've
                        # extracted that don't have a letter in them
                        # also filter out anything that looks like a range of
                        # weeks (eg. w1-12)
                        if location is not None and (
                                not re.search(r'[a-zA-Z]', location) or
                                re.match(r'^w\d+(?:-\d+)?$', location)):
                            location = None

                        timeslot = Timeslot(klass_id=klass_id,
                                            day=day,
                                            start_time=start_time,
                                            end_time=end_time,
                                            location=location)
                        db_session.add(timeslot)

        klass = Klass(klass_id=klass_id,
                      course_id=course_id,
                      dept_id=dept_id,
                      klass_type=klass_type,
                      status=status,
                      enrolled=enrolled,
                      capacity=capacity,
                      timeslot_raw_string_hash=(hash(time_and_place) %
                                                POSTGRES_MAX_INT))
        db_session.merge(klass)
        klasses_to_delete.pop(klass_id, None)
        klasses_added.add(klass_id)

    for klass in klasses_to_delete.values():
        db_session.delete(klass)
示例#8
0
def course_info(course_id):
    course_id = course_id.upper()
    dept_id, course_id = validate_course_id(course_id)
    course = db_session.query(Course).filter_by(dept_id=dept_id,
                                                course_id=course_id).one()
    return json.dumps(course.to_dict(with_classes=True))