def parse_exam_schedule(exam_file_name): m.Exam.objects._collection.drop() exam_file = open(exam_file_name) for line in exam_file: index = 0 tokens = re.split('\s+', line) # Get the course ID course_id = safe_list_get(tokens, 0) + safe_list_get(tokens, 1) course_id = course_id.lower() if not course_id: print "Skipping line '%s'" % ' '.join(tokens) continue # Get the sections # day_of_week_pattern = re.compile( index = 2 section_string = '' while not is_day_of_week(safe_list_get(tokens, index)): section_string += safe_list_get(tokens, index) + ' ' index += 1 section_string = section_string.strip() # Get the date. Next 4 tokens is Tuesday December 11, 2012 exam_date_string = '' for i in range(index, index + 4): exam_date_string += safe_list_get(tokens, i) + ' ' index += 4 start_date_string = (exam_date_string + safe_list_get(tokens, index) + safe_list_get(tokens, index + 1)) index += 2 end_date_string = (exam_date_string + safe_list_get(tokens, index) + safe_list_get(tokens, index + 1)) index += 2 # E.g. Tuesday December 11, 2012 7:30PM # Tuesday December 11, 2012 10:00PM date_format = "%A %B %d, %Y %I:%M%p" # TODO(sandy): do timezones better try: start_date = rmc_util.eastern_to_utc( datetime.fromtimestamp( mktime(time.strptime(start_date_string, date_format)))) end_date = rmc_util.eastern_to_utc( datetime.fromtimestamp( mktime(time.strptime(end_date_string, date_format)))) except Exception as exp: print "Could not get date for line '%s'" % ' '.join(tokens) # Don't remmeber exactly what exception I was trying to catch... print exp start_date = None end_date = None # Get the location location = '' while index < len(tokens): location += tokens[index] + ' ' index += 1 location = location.strip() exam_slot = m.Exam() exam_slot.course_id = course_id exam_slot.sections = section_string exam_slot.start_date = start_date exam_slot.end_date = end_date exam_slot.location = location if (start_date and end_date): exam_slot.info_known = True else: exam_slot.info_known = False exam_slot.save()
def import_opendata_exam_schedules(): """Import exam schedules data from the OpenData API""" today = datetime.today() file_name = os.path.join( os.path.dirname(__file__), '%s/uw_exams_%s.txt' % (c.EXAMS_DATA_DIR, today.strftime('%Y_%m_%d'))) processed_exams = [] errors = [] with open(file_name, 'r') as f: data = json.load(f) # Data will contain something like this: # # [{ # "course": "AFM 131", # "sections": [ # { # "date": "2014-04-17", # "day": "Thursday", # "end_time": "10:00 PM", # "location": "DC 1350", # "notes": "", # "section": "001", # "start_time": "7:30 PM" # }, # { # "date": "", # "day": "", # "end_time": "", # "location": "", # "notes": "See blah blah blah", # "section": "081 Online", # "start_time": "" # } # ] # }, ...] # # TODO(jlfwong): Refactor this to separate concerns of file IO, db # storage, and data processing so that the data processing step can be # tested, and this example can be moved into tests. for exam_data in data: course_id = m.Course.code_to_id(exam_data.get('course')) grouped_sections = group_similar_exam_sections( exam_data.get('sections', [])) for section_data in grouped_sections: section = section_data.get('section') day = section_data.get('day') # Catch these to be more detailed in our errors if section.endswith('Online'): errors.append("Skipping online course: %s %s" % (course_id, section)) continue if 'Exam removed' in day: errors.append("Skipping removed course: %s" % (course_id)) continue if 'See http:' in day: errors.append("Skipping url for course: %s" % (course_id)) continue # E.g. 2014-04-17 date = section_data.get('date') # E.g. 11:30 AM start_time = section_data.get('start_time') end_time = section_data.get('end_time') # E.g. 2014-04-17 7:30 PM # 2014-04-17 10:00 PM date_format = "%Y-%m-%d %I:%M %p" start_date_string = "%s %s" % (date, start_time) end_date_string = "%s %s" % (date, end_time) try: start_date = rmc_util.eastern_to_utc( datetime.fromtimestamp( time.mktime( time.strptime(start_date_string, date_format)))) end_date = rmc_util.eastern_to_utc( datetime.fromtimestamp( time.mktime( time.strptime(end_date_string, date_format)))) except Exception as exp: errors.append("Could not get date (%s)\n%s" % (section_data, exp)) continue exam = m.Exam( course_id=course_id, sections=section, start_date=start_date, end_date=end_date, location=section_data.get('location'), info_known=bool(start_date and end_date), ) processed_exams.append(exam) # Do some sanity checks to make sure OpenData is being reasonable. # This number is arbitrary and just reminds us to double-check # TODO(Sandy): This ranges from 775 (Fall & Winter) to 325 (Spring) season = m.Term.get_season_from_id(m.Term.get_current_term_id()) EXAM_ITEMS_THRESHOLD = 325 if season == 'Spring' else 775 if len(processed_exams) < EXAM_ITEMS_THRESHOLD: raise ValueError("processor.py: too few exam items %d (< %d)" % (len(processed_exams), EXAM_ITEMS_THRESHOLD)) # Everything should be fine by here, drop the old exams collection m.Exam.objects.delete() for exam in processed_exams: exam.save() return errors
def import_opendata_exam_schedules(): """Import exam schedules data from the OpenData API""" today = datetime.today() file_name = os.path.join( os.path.dirname(__file__), "%s/uw_exams_%s.txt" % (c.EXAMS_DATA_DIR, today.strftime("%Y_%m_%d")) ) processed_exams = [] errors = [] with open(file_name, "r") as f: data = json.load(f) # Data will contain something like this: # # [{ # "course": "AFM 131", # "sections": [ # { # "date": "2014-04-17", # "day": "Thursday", # "end_time": "10:00 PM", # "location": "DC 1350", # "notes": "", # "section": "001", # "start_time": "7:30 PM" # }, # { # "date": "", # "day": "", # "end_time": "", # "location": "", # "notes": "See blah blah blah", # "section": "081 Online", # "start_time": "" # } # ] # }, ...] # # TODO(jlfwong): Refactor this to separate concerns of file IO, db # storage, and data processing so that the data processing step can be # tested, and this example can be moved into tests. for exam_data in data: course_id = m.Course.code_to_id(exam_data.get("course")) grouped_sections = group_similar_exam_sections(exam_data.get("sections", [])) for section_data in grouped_sections: section = section_data.get("section") day = section_data.get("day") # Catch these to be more detailed in our errors if section.endswith("Online"): errors.append("Skipping online course: %s %s" % (course_id, section)) continue if "Exam removed" in day: errors.append("Skipping removed course: %s" % (course_id)) continue if "See http:" in day: errors.append("Skipping url for course: %s" % (course_id)) continue # E.g. 2014-04-17 date = section_data.get("date") # E.g. 11:30 AM start_time = section_data.get("start_time") end_time = section_data.get("end_time") # E.g. 2014-04-17 7:30 PM # 2014-04-17 10:00 PM date_format = "%Y-%m-%d %I:%M %p" start_date_string = "%s %s" % (date, start_time) end_date_string = "%s %s" % (date, end_time) try: start_date = rmc_util.eastern_to_utc( datetime.fromtimestamp(time.mktime(time.strptime(start_date_string, date_format))) ) end_date = rmc_util.eastern_to_utc( datetime.fromtimestamp(time.mktime(time.strptime(end_date_string, date_format))) ) except Exception as exp: errors.append("Could not get date (%s)\n%s" % (section_data, exp)) continue exam = m.Exam( course_id=course_id, sections=section, start_date=start_date, end_date=end_date, location=section_data.get("location"), info_known=bool(start_date and end_date), ) processed_exams.append(exam) # Do some sanity checks to make sure OpenData is being reasonable. # This number is arbitrary and just reminds us to double-check # TODO(Sandy): This ranges from 775 (Fall & Winter) to 325 (Spring) season = m.Term.get_season_from_id(m.Term.get_current_term_id()) EXAM_ITEMS_THRESHOLD = 325 if season == "Spring" else 775 if len(processed_exams) < EXAM_ITEMS_THRESHOLD: raise ValueError("processor.py: too few exam items %d (< %d)" % (len(processed_exams), EXAM_ITEMS_THRESHOLD)) # Everything should be fine by here, drop the old exams collection m.Exam.objects.delete() for exam in processed_exams: exam.save() return errors
def parse_exam_schedule(exam_file_name): m.Exam.objects._collection.drop() exam_file = open(exam_file_name) for line in exam_file: index = 0 tokens = re.split('\s+', line) # Get the course ID course_id = safe_list_get(tokens, 0) + safe_list_get(tokens, 1) course_id = course_id.lower() if not course_id: print "Skipping line '%s'" % ' '.join(tokens) continue # Get the sections # day_of_week_pattern = re.compile( index = 2 section_string = '' while not is_day_of_week(safe_list_get(tokens, index)): section_string += safe_list_get(tokens, index) + ' ' index += 1 section_string = section_string.strip() # Get the date. Next 4 tokens is Tuesday December 11, 2012 exam_date_string = '' for i in range(index, index + 4): exam_date_string += safe_list_get(tokens, i) + ' ' index += 4 start_date_string = (exam_date_string + safe_list_get(tokens, index) + safe_list_get(tokens, index + 1)) index += 2 end_date_string = (exam_date_string + safe_list_get(tokens, index) + safe_list_get(tokens, index + 1)) index += 2 # E.g. Tuesday December 11, 2012 7:30PM # Tuesday December 11, 2012 10:00PM date_format = "%A %B %d, %Y %I:%M%p" # TODO(sandy): do timezones better try: start_date = rmc_util.eastern_to_utc( datetime.fromtimestamp( mktime(time.strptime(start_date_string, date_format)))) end_date = rmc_util.eastern_to_utc( datetime.fromtimestamp( mktime(time.strptime(end_date_string, date_format)))) except Exception as exp: print "Could not get date for line '%s'" % ' '.join(tokens) # Don't remmeber exactly what exception I was trying to catch... print exp start_date = None end_date = None # Get the location location = '' while index < len(tokens): location += tokens[index] + ' ' index += 1 location = location.strip() exam_slot = m.Exam() exam_slot.course_id = course_id exam_slot.sections = section_string exam_slot.start_date = start_date exam_slot.end_date = end_date exam_slot.location = location if (start_date and end_date): exam_slot.info_known = True else: exam_slot.info_known = False exam_slot.save()