예제 #1
0
def fetch_ftp_files(out_dir):
	"""Downloads ftp files from UT Austin FTP server

	Args:
		out_dir (str): directory to download files to
	"""
	__url = 'reg-it.austin.utexas.edu'
	__username = '******'

	logger.info(f"Downloading FTP data files to {out_dir}")

	cur_dir = getcwd()

	ftp = FTP(__url)
	ftp.login(user=__username)

	chdir(out_dir)
	for filename in (filename_current, filename_next, filename_future):

		logger.debug(f'FTP: downloading {filename}')
		local_file = open(filename, 'wb')
		ftp.retrbinary('RETR ' + filename, local_file.write, 1024)
		local_file.close()
	
	ftp.quit()

	chdir(cur_dir)
예제 #2
0
def refresh_ecis():
	"""
	Set course and prof ecis_avg and ecis_students by iterating through ecis_scores
	"""

	logger.info("Refreshing course and professor ecis fields with respective data")
	query_tuple = (Course.query.all(), Prof.query.all())

	# will iterate between Course and Prof since code is identical
	for queries in query_tuple:
		for query in queries:

			if type(query) is Course:
				logger.debug(f"Refreshing ecis for Course: {query.dept} {query.num}")
			elif type(query) is Prof:
				logger.debug(f"Refreshing ecis for Prof: {query.first_name} {query.last_name}")

			ecis = 0
			students = 0

			# iterate through ecis scores specific to the course/prof
			for prof_course in query.prof_course:
				for prof_course_sem in prof_course.prof_course_sem:
					for ecis_child in prof_course_sem.ecis:
						ecis += ecis_child.course_avg * ecis_child.num_students
						students += ecis_child.num_students

			# average will be None if there are no students
			query.ecis_avg = (ecis / students) if students > 0 else None
			query.ecis_students = students
			db.session.commit()
예제 #3
0
def populate_dept(dept_info, override=False):
	"""
	Populate the database with departments
	:param dept_info: list of tuples with: (abbreviation, name)
	:type dept_info: tuple(str, str)
	:param override: override current department with same abbreviation if found in database
	:type override: bool
	"""

	logger.info("Populating database with departments")
	for abr, name in dept_info:

		cur_dept = Dept.query.filter_by(abr=abr).first()
		if cur_dept is None:
			# add department to database
			abr = abr.strip()
			name = name.strip()

			logger.debug(f"Adding dept {name} ({abr}) to database")
			dept = Dept(abr=abr, name=name)
			db.session.add(dept)

		elif override:
			# override current department
			logger.debug(f"Overriding dept {name} ({abr}) to database")
			cur_dept.abr = abr
			cur_dept.name = name

		else:
			# department already exists and not overriding
			logger.debug(f"Already exists: dept {name} ({abr})")

		db.session.commit()
예제 #4
0
def parse_prof_csv(file_path):
    """
    Parse .csv file containing prof data
    :param file_path: path to prof file
    :type file_path: str
    :return: sorted list of prof data
    :rtype: list(tuple(str, str, str))
    """

    __key_sem = 'CCYYS'
    __key_prof_name = 'INSTR_NAME'
    __key_prof_eid = 'INSTR_EID'

    logger.info(f'Parsing prof csv file: {file_path}')
    df = pd.read_csv(file_path)
    profs = set()
    for index, row in df.iterrows():

        semester, name, eid = row[__key_sem], row[__key_prof_name], row[
            __key_prof_eid]
        try:
            semester = int(semester)
        except ValueError:
            logger.debug(
                f'Unable to parse semester {semester}. Defaulting to 0...')
            semester = 0

        profs.add((semester, name.lower(), eid.lower()))

    profs = sorted(list(profs), key=lambda x: x[0])
    return profs
예제 #5
0
def populate_sem(start_yr=2010, end_yr=2020):
	"""
	Populate database with semesters for the given year range. Will populate for spring, summer, fall semesters.
	:param start_yr: starting year for the populate
	:type start_yr: int
	:param end_yr: ending year for the populate
	:type end_yr: int
	"""

	logger.info(f"Populating database with semesters from {start_yr} to {end_yr}")
	for yr in range(start_yr, end_yr):
		for sem in (2, 6, 9):
			if Semester.query.filter_by(year=yr, semester=sem).first() is not None:
				check_or_add_semester(yr, sem)
예제 #6
0
def automate_backend(run_once):
    """
    Function used to automate backend tasks such as
        1. fetch ftp files and update scheduled course info
        2. read maintenance.txt and perform task as necessary
        3. organize log files
    :param name: required parameter as function passed to thread
    :type name: str
    """
    while True:

        logger.info("Automation task in backend")

        dt_today = datetime.datetime.now(pytz.timezone('America/Chicago'))
        dt_tmr = dt_today + datetime.timedelta(days=1)
        dt_tmr = dt_tmr.replace(hour=1, minute=0)

        if run_once:
            run_once = False
            logger.info('Running once for automation')
        else:
            __a_day_secs = 24 * 3600
            until_start = int((dt_tmr - dt_today).total_seconds())
            if until_start > __a_day_secs:
                until_start -= __a_day_secs

            logger.info(f"Waiting {until_start} seconds until start time for automation")
            for _ in range(until_start):
                time.sleep(1)

        # task 1: fetch ftp files and update scheduled course info
        logger.info("Fetching new ftp files")
        fetch_ftp_files('input_data')
        semester_path = fetch_sem_values("input_data", "input_data")
        update_sem_vals(semester_path)

        # logger.info("Updating scheduled course database info")
        ftp_info = parse_ftp("input_data")
        reset_scheduled_info()
        populate_scheduled_course(ftp_info)

        # task 2: read maintenance.txt and perform task as necessary
        run_maintenance()

        # task 3: organize log files
        organize_log_files()
        logger.info("Finished automation")
예제 #7
0
def populate_dept_info(dept_info):
	"""
	Populate department with additional information (college and department name)
	:param dept_info: list of tuples containing: (abbreviation, department name, college name)
	:type dept_info: list[tuple(str, str, str)
	"""
	logger.info('Populating departments with additional info')

	for abr, dept, college in dept_info:

		cur_dept = Dept.query.filter_by(abr=abr).first()
		if cur_dept is None:
			logger.debug(f"Cannot find dept {abr}")
		else:
			logger.debug(f"Updating dept: {abr} with dept={dept}, college={college}")
			cur_dept.dept = dept
			cur_dept.college = college

		db.session.commit()
예제 #8
0
def maintenance_course_task(path, pages):
    """
    Run maintenance task for course request
    Will:
        1. update department information with respect to the given file
        2. update course information with respect to the given file
    :param path: path to the input file
    :type path: str
    :param pages: pages of the file to parse
    :type pages: list[int]
    """

    logger.info("Updating department info")
    departments = fetch_depts()
    populate_dept(departments, override=True)

    dept_info = fetch_dept_info(path, pages)
    populate_dept_info(dept_info)

    logger.info("Updating course info")
    courses = fetch_courses(path, pages)
    populate_course(courses, cur_sem=int(sem_current))
예제 #9
0
def organize_log_files():
    """
    Function tasked with organizing the log files with the following folder structure:
    1. /log
        2. /year_<year num>
            3. /week_<week start date>_to_<week end date>
                4. <log files corresponding to the week
    Assumption: files without extensions after .log will not be organized (eg file.log vs file.log.20200830)
        where the extension will mark the date of log
    """

    logger.info("Organizing log files")
    files = [f for f in os.listdir(DEFAULT_LOG_FOLDER) if os.path.isfile(os.path.join(DEFAULT_LOG_FOLDER, f))]

    for f in files:
        log_path = get_log_file_path(f)
        if log_path is not None:
            dir_path = os.path.split(log_path)[0]
            if not os.path.exists(dir_path):
                os.makedirs(dir_path)
            original_path = os.path.join(DEFAULT_LOG_FOLDER, f)
            shutil.move(original_path, log_path)
예제 #10
0
def parse_ftp(in_dir):
	"""Parse FTP files from the UT Austin FTP server

	Args:
		in_dir (str): directory containinig the ftp files
	"""

	logger.info(f"Parsing FTP files from {in_dir}")
	courses = []

	for filename in (filename_current, filename_next, filename_future):

		filepath = join(in_dir, filename)

		if isfile(filepath):

			logger.debug(f'FTP: parsing {filename}')
			with open(filepath) as f:

				lines = f.readlines()
				categories, lines = __parse_categories(lines)
				
				if categories is not None:

					for line in lines:
						# standardizing the lines
						line = line.lower()
						data = line.split("\t")
						data = [d.strip() for d in data]

						if len(line) > 0 and len(data) >= len(categories):
							# separating data by category list
							course = {categories[i]: data[i] for i in range(len(categories))}
							courses.append(course)
		else:
			logger.debug(f'FTP: {filename} does not exist in {in_dir}')

	return courses
예제 #11
0
def fetch_sem_values(ftp_dir, out_dir):
	"""
	fetch semester values from the FTP data files from the given directory
	:param ftp_dir: the directory containing the ftp data files
	:param out_dir: the directory to output a file containing the semester data
	"""

	files = (filename_current, filename_next, filename_future)
	keys = (key_current, key_next, key_future)

	out_path = join(out_dir, sem_file)
	sem_dict = {}
	logger.info(f"Fetching semester values from dir={ftp_dir}, to file={out_path}")

	for i in range(len(files)):

		sem = None
		m_file = files[i]
		filepath = join(ftp_dir, m_file)

		if isfile(filepath):

			with open(filepath, 'r') as f:
				lines = f.readlines()				
				
			for line in lines:
				if __sem_label in line:
					m = re.search(r'[A-Za-z ]+(\d{5}) (.*)?', line)
					sem = m.group(1)
		else:
			logger.debug(f"Fetch Sem: cannot find file: {m_file} in {ftp_dir}")

		sem_dict[keys[i]] = sem

	with open(out_path, 'w') as f:
		json.dump(sem_dict, f)
	
	return out_path
예제 #12
0
def run_maintenance():
    """
    Check maintenance txt file (default="maintenance.txt") for maintenance tasks
    Potential tasks:
        1. ‘course <insert path to file> <insert comma separated page numbers>’
            update Course rows reading in Excel file (semester basis)
        2. ‘ecis <insert path to file> <insert comma separated page numbers>’
            update ECIS info (semester basis)
        3. ‘prof_course <insert path to file> <insert comma separated page numbers>’
            update ProfCourse  relationships (should receive most of NEW info from FTP)
        4. ‘prof <insert path to file>'
            update Professor info (unlikely)
    """
    __maintenance_txt_file = "maintenance.txt"
    logger.info(f"Initiating {__maintenance_txt_file}")

    if os.path.isfile(__maintenance_txt_file):
        with open(__maintenance_txt_file, 'r') as f:
            commands = f.readlines()

        while len(commands) > 0:

            command = commands[0]
            command_parts = command.split(' ')

            if len(command_parts) >= 2:
                cmd, path = command_parts[0].strip(), command_parts[1].strip()
                logger.info(f"Executing {cmd} {path}")

                if len(command_parts) >= 3:
                    pages = [int(page.strip().replace('\'', "").replace("\"", "")) for page in command_parts[2].split(',')]

                    if cmd == 'course':
                        maintenance_course_task(path, pages)
                    elif cmd == 'ecis':
                        populate_ecis(path, pages)
                else:
                    if cmd == 'prof_course':
                        populate_prof_course(path)
                    elif cmd == 'prof':
                        profs = parse_prof_csv(path)
                        populate_prof_eid(profs)
                    elif cmd == 'ftp':
                        logger.info("Updating scheduled course database info")
                        ftp_info = parse_ftp("input_data")
                        reset_scheduled_info()
                        populate_scheduled_course(ftp_info)
            
            commands = commands[1:]
            with open(__maintenance_txt_file, 'w') as f:
                f.writelines(commands)
예제 #13
0
def reset_scheduled_info():
	logger.info("Resetting scheduled info")
	for scheduled_course in ScheduledCourse.query.all():
		scheduled_course.mark_deletion = False
		db.session.commit()
예제 #14
0
def populate_course(course_info, cur_sem=None):
	"""
	Populate database with courses
	:param course_info: list of dictionaries containing course data
	:type course_info: list[dict]
	:param cur_sem: the current semester. if set to None, data will be replaced with most recent value
	:type cur_sem: int or None
	"""

	__inherit = "(See Base Topic for inherited information.)"
	null_depts = set()

	logger.info("Populating database with courses")

	for course in course_info:

		# fetch values from dictionary
		semester = course[KEY_SEM]
		dept = course[KEY_DEPT]
		num = course[KEY_NUM]
		title = course[KEY_TITLE]
		cs_title = course[KEY_CS_TITLE]
		description = course[KEY_DESCRIPTION]
		restrictions = course[KEY_RESTRICTION]
		t_num = course[KEY_TOPIC_NUM]
		pre_req = course[KEY_PRE_REQ]

		# check to see if dept exists --> else ski[
		dept_obj = Dept.query.filter_by(abr=dept).first()

		if dept_obj is None:
			null_depts.add(dept)
			continue

		# if topic number > 0, then title = modified cs title
		if t_num > 0:
			cs_title = __parse_title(cs_title)
			title = title if cs_title is None else cs_title

		# None if course doesn't currently exist
		old_course = None
		# define new base course variable
		new_course = Course(
			num=num,
			title=title,
			description=description,
			restrictions=restrictions,
			pre_req=pre_req,
			dept_id=dept_obj.id,
			topic_num=t_num
		)

		# condition on topic number
		if t_num >= 0:

			# all courses with same topic number --> should be unique topics
			# if len 0 --> new topic
			topic_courses_flask = Course.query.filter_by(dept_id=dept_obj.id, num=num)
			topic_courses = topic_courses_flask.all()

			# set topic number --> will create new topic if doesnt exist
			new_course.topic_id = __check_new_topic(topic_courses_flask)

			# assumption: unique based on topic number
			t_course_flask = topic_courses_flask.filter_by(topic_num=t_num)

			if t_num == 0:

				if len(t_course_flask.all()) > 0:
					old_course = t_course_flask.first()

				__populate_child_topics(new_course, topic_courses, __inherit)
			else:

				topic_zero = __get_topic_zero(topic_courses)

				if len(t_course_flask.all()) > 0:
					old_course = t_course_flask.first()

				__populate_child_topics(topic_zero, [new_course], __inherit)

		else:
			# course doesn't have topic number
			old_course = Course.query.filter_by(dept_id=dept_obj.id, num=num).first()

		# create new or replace old
		if old_course is None:
			# new course
			logger.debug(f"Creating new course {dept_obj.abr} {new_course.num}")
			db.session.add(new_course)
		elif cur_sem is None or semester == cur_sem:
			# course existed but replacing
			logger.debug(f"Replacing previous {old_course.dept.abr} {old_course.num}")
			__replace_course(old_course, new_course)
		else:
			# course existed and skipping
			logger.debug(f"Already existed: {old_course.dept.abr} {old_course.num}")

		db.session.commit()

	null_depts = list(null_depts)
	null_depts.sort()
	for dept in null_depts:
		logger.debug(f"Unexpected Error: department {dept} cannot be found in the database")
예제 #15
0
def populate_ecis(file_path, pages):
	"""
	Populate database with ECIS information
	:param file_path: path to file containing data
	:type file_path: str
	:param pages: pages of file to parse
	:type pages: list[int] or list[str]
	"""

	# FOR FUTURE UPDATES, PLEASE READ:
	# remember to update Course and Prof ECIS fields when inputting new ECIS scores: ecis_avg and ecis_students

	logger.info(f'Populating ecis database with data from: {file_path}')
	ecis_lst = parse_ecis_excel(file_path, pages)

	for ecis in ecis_lst:

		# separate values from dictionary
		unique, c_avg, p_avg, students, yr, sem = (
			ecis[KEY_UNIQUE_NUM],
			ecis[KEY_COURSE_AVG],
			ecis[KEY_PROF_AVG],
			ecis[KEY_NUM_STUDENTS],
			ecis[KEY_YR],
			ecis[KEY_SEMESTER]
		)

		# check for existence of specified Semester, ProfCourseSemester in database
		logger.debug(f'Adding ecis for: unique={unique}, sem={yr}{sem}')
		sem_obj = Semester.query.filter_by(year=yr, semester=sem).first()
		if sem_obj is None:
			logger.debug(f"Cannot find semester for: {yr}{sem}. Skipping...")
			continue

		pcs_obj = ProfCourseSemester.query.filter_by(unique_num=unique, sem_id=sem_obj.id).first()
		if pcs_obj is None:
			logger.debug(
				f"Failed to find ProfCourseSemester for: unique={unique}, sem={yr}{sem}. Skipping..."
			)
			continue

		# assumption: only one ecis score per prof_course_semester instance -> else skip
		ecis_lst = pcs_obj.ecis
		if len(ecis_lst) >= 1:
			# ecis already exists
			continue

		# creating the ecis object
		ecis_obj = EcisScore(
			course_avg=c_avg,
			prof_avg=p_avg,
			num_students=students,
			prof_course_sem_id=pcs_obj.id)
		db.session.add(ecis_obj)
		db.session.commit()

		# updating course and prof ecis fields
		logger.debug("Updating prof and course ecis fields")
		pc_obj = pcs_obj.prof_course
		course_obj = pc_obj.course
		prof_obj = pc_obj.prof

		queries = ((course_obj, c_avg), (prof_obj, p_avg))
		for query, avg in queries:
			total_students = query.ecis_students + students
			total_avg = ((query.ecis_avg * query.ecis_students) if query.ecis_avg is not None else 0) + \
				((avg * students) if avg is not None else 0)
			query.ecis_avg = (total_avg / total_students) if total_students > 0 else None
			query.ecis_students = total_students

		db.session.commit()
예제 #16
0
def update_scheduled_courses(s_course_queue):
	"""
	Update scheduled_course data in the database with the new information listed in queue.
	NOTE: minimize add/delete to prevent the database taking too long.
	Details: this function will set "extra" scheduled_course entries' mark_deletion to True.
			DO NOT use scheduled_course entries with mark_deletion set to True (no need to actually delete)
	:param s_course_queue: data to populate the database with
	:type s_course_queue: list(
		dict{
			"scheduled": ScheduledCourse object containing the data,
			"prof": Prof object containing related prof data,
			"course": Course object containing related course data,
			"semester": Semester object containing related semester data,
			"unique": str or int containing the unique number for the scheduled_course,
			"xlist": list of str or int of the unique numbers for schedule_course entries that are crosslisted.
		}
	)
	"""

	logger.info("Updating scheduled course information")
	semesters = {
		'current': {
			'courses': {},
			'profs': {}
		},
		'next': {
			'courses': {},
			'profs': {}
		},
		'future': {
			'courses': {},
			'profs': {}
		}
	}
	cur_s_courses = ScheduledCourse.query.all()

	for i in range(min(len(cur_s_courses), len(s_course_queue))):

		cur_s_course = cur_s_courses[i]
		s_course = s_course_queue[i]

		cur_schedule = s_course['scheduled']
		cur_prof = s_course['prof']
		cur_course = s_course['course']
		semester = s_course['semester']
		unique_no = s_course['unique']
		xlist_str = s_course['xlist']
		
		# check to see if cross_listings exist else create new
		x_list = check_or_add_xlist(xlist_str, semester)

		update_scheduled_course(cur_s_course, cur_schedule, x_list)
		db.session.commit()

		# add prof course and prof course semester relationship if doesnt exist
		if cur_prof:
			_, prof_course = check_or_add_prof_course(cur_prof, cur_course)
			check_or_add_prof_course_semester(unique_no, prof_course, semester)	

		# update course and prof semester fields (whether they are teaching the respective semesters)
		full_semester = int(str(semester.year) + str(semester.semester))

		if full_semester == sem_current:
			if cur_course:
				semesters['current']['courses'][cur_course.id] = True
			if cur_prof:
				semesters['current']['profs'][cur_prof.id] = True
		elif full_semester == sem_next:
			if cur_course:
				semesters['next']['courses'][cur_course.id] = True
			if cur_prof:
				semesters['next']['profs'][cur_prof.id] = True
		elif full_semester == sem_future:
			if cur_course:
				semesters['future']['courses'][cur_course.id] = True
			if cur_prof:
				semesters['future']['profs'][cur_prof.id] = True

	logger.info("Checking scheduled data for uneven sizings")
	if len(s_course_queue) > len(cur_s_courses):
		logger.info("Have additional new schedueled courses")
		for s_course in s_course_queue[len(cur_s_courses):]:

			cur_schedule = s_course['scheduled']
			cur_prof = s_course['prof']
			cur_course = s_course['course']
			semester = s_course['semester']
			unique_no = s_course['unique']
			xlist_str = s_course['xlist']
		
			# check to see if cross_listings exist else create new
			x_list = check_or_add_xlist(xlist_str, semester)
			cur_schedule.cross_listed = x_list.id

			db.session.add(cur_schedule)
			db.session.commit()

			# add prof course and prof course semester relationship if doesnt exist
			if cur_prof:
				_, prof_course = check_or_add_prof_course(cur_prof, cur_course)
				check_or_add_prof_course_semester(unique_no, prof_course, semester)	

			# update course and prof semester fields (whether they are teaching the respective semesters)
			full_semester = int(str(semester.year) + str(semester.semester))

			if full_semester == sem_current:
				if cur_course:
					semesters['current']['courses'][cur_course.id] = True
				if cur_prof:
					semesters['current']['profs'][cur_prof.id] = True
			elif full_semester == sem_next:
				if cur_course:
					semesters['next']['courses'][cur_course.id] = True
				if cur_prof:
					semesters['next']['profs'][cur_prof.id] = True
			elif full_semester == sem_future:
				if cur_course:
					semesters['future']['courses'][cur_course.id] = True
				if cur_prof:
					semesters['future']['profs'][cur_prof.id] = True
	
	for s_course in ScheduledCourse.query.all():
		if s_course.mark_deletion is not None:
			s_course.mark_deletion = True
			db.session.commit()

	logger.info("Updating course and professor semesters")
	all_profs = Prof.query.all()
	all_courses = Course.query.all()

	for prof in all_profs:
		if (
			(prof.current_sem != semesters['current']['profs'].get(prof.id, False)) or 
			(prof.next_sem != semesters['next']['profs'].get(prof.id, False)) or 
			(prof.future_sem != semesters['future']['profs'].get(prof.id, False))
		):
			prof.current_sem = semesters['current']['profs'].get(prof.id, False)
			prof.next_sem = semesters['next']['profs'].get(prof.id, False)
			prof.future_sem = semesters['future']['profs'].get(prof.id, False)
			db.session.commit()

	for course in all_courses:
		if (
			(course.current_sem != semesters['current']['courses'].get(course.id, False)) or
			(course.next_sem != semesters['next']['courses'].get(course.id, False)) or 
			(course.future_sem != semesters['future']['courses'].get(course.id, False))
		):
			course.current_sem = semesters['current']['courses'].get(course.id, False)
			course.next_sem = semesters['next']['courses'].get(course.id, False)
			course.future_sem = semesters['future']['courses'].get(course.id, False)
			db.session.commit()
예제 #17
0
def parse_ecis_excel(file_path, sheet_lst):
    """Parse the ecis excel document for ecis information on courses and professors

	Args:
		file_path (str): file path to ecis excel documents
		sheet_lst (list[str]): list of sheet names to parse

	Returns:
		list[dict[str, int]]: dictionary containing course and prof ecis information
			Structure: [
				{
					KEY_UNIQUE_NUM: int,
					KEY_COURSE_AVG: float,
					KEY_PROF_AVG: float,
					KEY_NUM_STUDENTS: int,
					KEY_YR: int,
					KEY_SEMESTER: int,
				}, ...
			]
	"""

    __unique_num_digits = 5
    __sem_key = 'SEMESTER_CCYYS'
    __unique_key = 'UNIQUE'
    __num_students_key = 'NBR_SURVEY_FORMS_RETURNED'
    __course_avg_key = 'AVG_COURSE_RATING'
    __prof_avg_key = 'AVG_INSTRUCTOR_RATING'

    ecis_lst = []

    for sheet in sheet_lst:

        rows_skipped = 0
        ecis_df = pd.read_excel(file_path, sheet_name=sheet)

        for index, row in ecis_df.iterrows():

            # check for valid year semester string. If invalid, skip
            yr_sem = str(row[__sem_key])
            if len(yr_sem) < 5:
                rows_skipped += 1
                continue

            yr_sem = yr_sem[0:5]
            yr = yr_sem[0:-1]
            sem = yr_sem[-1]

            # convert everything to int or float--> if N/A then fail and skip
            try:

                unique_str = str(row[__unique_key])
                unique_str = unique_str.split(
                    '.')[0] if '.' in unique_str else unique_str

                num_students_str = str(row[__num_students_key])
                num_students_str = num_students_str.split(
                    '.')[0] if '.' in num_students_str else num_students_str

                yr = int(yr)
                sem = int(sem)
                unique_num = int(unique_str)
                num_students = int(num_students_str)
                course_avg = float(row[__course_avg_key])
                prof_avg = float(row[__prof_avg_key])
            except (ValueError, IndexError):
                rows_skipped += 1
                continue

            # TODO: add course and prof relationship once available
            # create ecis dictionary
            ecis = {
                KEY_UNIQUE_NUM: unique_num,
                KEY_COURSE_AVG: course_avg,
                KEY_PROF_AVG: prof_avg,
                KEY_NUM_STUDENTS: num_students,
                KEY_YR: yr,
                KEY_SEMESTER: sem
            }

            ecis_lst.append(ecis)

        logger.info(
            f'Finished parsing {sheet} sheet: num_rows_skipped={rows_skipped}')

    return ecis_lst
예제 #18
0
def populate_prof_course(in_file):
	"""
	Populate database with Professor and Course relationship using data fetched from the web
	(utreview.services.fetch_web.fetch_prof_course_info only)
	:param in_file: file the data was fetched to
	:type in_file: str
	"""

	__sem_fall = "Fall"
	__sem_spring = "Spring"
	__sem_summer = "Summer"

	logger.info(f"Populating database with prof_course info using {in_file}")

	# creating list of prof-course relationships from the given file
	prof_courses = []
	with open(in_file, 'r') as f:
		for line in f:
			prof_courses.append(json.loads(line))
	cur_profs = Prof.query.all()

	# add each prof-course relationship to the database if appropriate
	for prof_course in prof_courses:

		# check for existence of professor -> add if does not exist
		prof_name = [name.strip() for name in prof_course[KEY_PROF].lower().split(",")]
		last, first = prof_name[0].strip(), prof_name[1].strip()
		last_words = [word.strip() for word in last.split(' ') if len(word.strip()) > 0]
		first_words = [word.strip() for word in first.split(' ') if len(word.strip()) > 0]

		target_prof = None
		for cur_prof in cur_profs:
			found = True

			cur_last, cur_first = cur_prof.last_name.lower(), cur_prof.first_name.lower()
			cur_last_words = [word.strip() for word in cur_last.split(' ') if len(word.strip()) > 0]
			cur_first_words = [word.strip() for word in cur_first.split(' ') if len(word.strip()) > 0]

			for word in last_words:
				if word not in cur_last_words:
					found = False
					break
			
			if found:
				for word in first_words:
					if word not in cur_first_words:
						found = False
						break
			
			if found:
				target_prof = cur_prof
				break
		
		if target_prof is None:
			logger.debug(f"Cannot find prof: {prof_course[KEY_PROF]}. Skipping...")
			continue

		# check for existence of department -> skip if does not exist
		abr = prof_course[KEY_DEPT].strip().upper()
		dept = Dept.query.filter_by(abr=abr).first()
		if dept is None:
			logger.debug(f"Cannot find dept: {abr}. Skipping...")
			continue

		# check if course exists -> add if does not exist
		# TODO: choosing topic 0 by default. Update when topic info available.
		num_results, course = check_or_add_course(dept, prof_course[KEY_CNUM], prof_course[KEY_TITLE])
		if num_results > 1:
			courses = Course.query.filter_by(dept_id=dept.id, num=prof_course[KEY_CNUM])
			for c in courses:
				if c.topic_num <= 0:
					course = c
		db.session.commit()

		# check if prof_course exists -> add if it doesn't
		_, prof_course_obj = check_or_add_prof_course(target_prof, course)
		db.session.commit()

		# parse semester to integer representation
		sem_lst = [s.strip() for s in prof_course[KEY_SEM].split(",")]
		if sem_lst[1] == __sem_spring:
			sem = SPRING_SEM
		elif sem_lst[1] == __sem_summer:
			sem = SUMMER_SEM
		elif sem_lst[1] == __sem_fall:
			sem = FALL_SEM
		else:
			logger.debug(f"Invalid semester: {sem_lst[1]}. Skipping...")
			continue

		yr = int(sem_lst[0].strip())

		# check for semester existence -> if it doesn't, add to database
		_, sem_obj = check_or_add_semester(yr, sem)

		# check for prof_course_semester existence -> if it doesn't add to database
		check_or_add_prof_course_semester(prof_course[KEY_UNIQUE], prof_course_obj, sem_obj)
		db.session.commit()
예제 #19
0
def fetch_prof_course_info(out_file, sems, depts):
    """
    Parse prof course info from the site -> for the relationship
    :param out_file: file to output the relationships/data
    :type out_file: str
    :param sems: semesters to fetch data for
    :type sems: list[str]
    :param depts: departments to fetch data for
    :type depts: list[str]
    """
    __sem_header = 'SEMESTER'
    __dept_header = 'DEPT'
    __title_header = 'TITLE'
    __course_num_header = 'COURSENUMBER'
    __unique_header = 'UNIQUE'
    __instr_header = 'INSTRUCTOR(S)*'

    logger.info(f"Fetching prof_course info. Output={out_file}. Semesters={sems}. Departments={depts}")

    for sem in sems:
        for dept in depts:

            # get BeautifulSoup object for the parameters
            html = fetch_html(get_prof_course_url(sem, dept))
            html_soup = BSoup(html, "html.parser")

            # look for headers on page -> headers for the table
            headers = html_soup.find("tr", {"class": "tbh header"})
            if headers is None:
                logger.debug("Cannot find headers for prof_course search: "
                             f"Semester={sem}, Department={dept}. Skipping...")
                continue
            headers = [header.text.replace("\n", "").strip() for header in headers.findAll("th")]
            logger.debug(f"Fetched headers from profcourse site with headers: {headers}")

            # parse out indices for each of the headers
            sem_index, dept_index, title_index, cnum_index, unique_index, instr_index = get_header_indices(
                headers,
                __sem_header,
                __dept_header,
                __title_header,
                __course_num_header,
                __unique_header,
                __instr_header
            )

            # iterate through each row in the web table and parse out data
            rows = html_soup.findAll("tr", {"class": ["tboff", "tbon"]})
            for row in rows:
                cols = row.findAll("td")
                cols = [col.text.replace("\n", "").strip() for col in cols]

                # get data via the indices for the headers
                for i in range(len(cols)):
                    if 'CV' in cols[i]:
                        cols[i] = cols[i].split('CV')[0].strip()

                # create dictionary containing the data
                prof_course = {
                    KEY_SEM: cols[sem_index] if sem_index is not None else None,
                    KEY_DEPT: cols[dept_index] if dept_index is not None else None,
                    KEY_TITLE: cols[title_index].strip()[:-1] if title_index is not None else None,
                    KEY_CNUM: cols[cnum_index] if cnum_index is not None else None,
                    KEY_UNIQUE: cols[unique_index] if unique_index is not None else None,
                    KEY_PROF: cols[instr_index] if instr_index is not None else None
                }

                # write dictionary to file
                with open(out_file, "a") as f:
                    json.dump(prof_course, f)
                    f.write("\n")
예제 #20
0
def populate_scheduled_course(course_info):
	"""
	Populate the database with scheduled course info as parsed from FTP
	:param course_info: list of course data
	:type course_info: list[dict]
	"""

	logger.info("Populating database with scheduled course info")
	s_course_queue = []

	for s_course in course_info:

		# create ScheduledCourseInfo object using the s_course dictionary
		try:
			scheduled = ScheduledCourseInfo(s_course)
		except ValueError as err:
			logger.warn(f"Populate scheduled course error: {err}. Skipping...")
			continue

		# check to see if dept exists
		dept_obj = Dept.query.filter_by(abr=scheduled.dept).first()
		if dept_obj is None:
			logger.debug(f"Populate scheduled course: cannot find department {scheduled.dept}. Skipping...")
			continue

		# check to see if course exists
		cur_courses = Course.query.filter_by(dept_id=dept_obj.id, num=scheduled.c_num)
		if len(cur_courses.all()) > 1:
			cur_courses = cur_courses.filter_by(topic_num=scheduled.topic)
		cur_course = cur_courses.first()

		if cur_course is None:
			course_log_description = f"{scheduled.dept} {scheduled.c_num} w/ topic num {scheduled.topic}"
			logger.debug(f"Populate scheduled course: cannot find course {course_log_description}. Skipping...")
			continue

		# check to see if prof exists --> if not then leave empty
		cur_prof = Prof.query.filter_by(eid=scheduled.prof_eid).first()

		if cur_prof is None:
			logger.warn(f"Could not find professor w/ EID={scheduled.prof_eid}. Leaving empty...")

		# check to see if semester exists else add semester
		_, semester = check_or_add_semester(yr=scheduled.yr, sem=scheduled.sem)

		# check to see if scheduled course exists else create new
		num_results, cur_schedule = check_or_add_scheduled_course(scheduled, cur_course, cur_prof, None, semester, add=False)
		if num_results > 0:
			logger.debug(f"""Updating scheduled course. Unique = {scheduled.unique_no}
					semester={repr(semester)}
					course={repr(cur_course)}
					prof={repr(cur_prof)}""")
			cur_schedule = scheduled.to_scheduled_course(cur_schedule, semester, cur_course, cur_prof, None)
		
		s_course_queue.append({
			'scheduled': cur_schedule,
			'prof': cur_prof,
			'course': cur_course,
			'semester': semester, 
			'unique': scheduled.unique_no,
			'xlist': scheduled.x_listings,
		})
	update_scheduled_courses(s_course_queue)