def get_semesters(semesters=None, verbose=False): """ Validate a given string semesters argument, and return a list of the individual string semesters specified by the argument. """ possible_semesters = all_semesters() if semesters is None: semesters = [get_current_semester()] elif semesters == "all": semesters = list(possible_semesters) else: semesters = semesters.strip().split(",") for s in semesters: if s not in possible_semesters: raise ValueError(f"Provided semester {s} was not found in the db.") if verbose: if len(semesters) > 1: print( "This script's updates for each semester are atomic, i.e. either all the " "updates for a certain semester are accepted by the database, or none of them are " "(if an error is encountered). If an error is encountered during the " "processing of a certain semester, any correctly completed updates for previously " "processed semesters will have already been accepted by the database." ) else: print( "This script's updates for the given semester are atomic, i.e. either all the " "updates will be accepted by the database, or none of them will be " "(if an error is encountered)." ) return semesters
def registrar_import(semester=None, query=""): if semester is None: semester = get_current_semester() print("Loading in courses with prefix %s from %s..." % (query, semester)) results = registrar.get_courses(query, semester) missing_sections = set( Section.objects.filter(course__semester=semester).values_list( "full_code", flat=True)) for info in tqdm(results): upsert_course_from_opendata(info, semester, missing_sections) Section.objects.filter(full_code__in=missing_sections).update(status="X") print("Updating department names...") departments = registrar.get_departments() for dept_code, dept_name in tqdm(departments.items()): dept, _ = Department.objects.get_or_create(code=dept_code) dept.name = dept_name dept.save() print("Loading course statuses from registrar...") set_all_status(semester=semester) recompute_stats(semesters=semester, verbose=True) fill_topics(verbose=True) load_crosswalk(print_missing=False, verbose=True)
def best_recommendations( cluster, curr_course_vectors_dict, user_vector, exclude: Optional[Set[str]] = None, n_recommendations=5, ): recs = [] for course in cluster: if exclude is not None and course in exclude: continue course_vector = curr_course_vectors_dict[course] similarity = cosine_similarity(course_vector, user_vector) recs.append((course, similarity)) rec_course_to_score = {course: score for course, score in recs} recs = [(c.full_code, rec_course_to_score[c.full_code]) for c in Course.objects.filter(semester=get_current_semester(), full_code__in=list( rec_course_to_score.keys())) ] # only recommend currently offered courses if n_recommendations > len(recs): n_recommendations = len(recs) return [ course for course, _ in heapq.nlargest(n_recommendations, recs, lambda x: x[1]) ]
def get_semester(self): semester = self.kwargs.get("semester", "current") if semester == "current": semester = get_current_semester(allow_not_found=True) semester = semester if semester is not None else "all" return semester
def handle(self, *args, **kwargs): root_logger = logging.getLogger("") root_logger.setLevel(logging.DEBUG) semester = get_current_semester() statuses = registrar.get_all_course_status(semester) stats = { "missing_data": 0, "section_not_found": 0, "duplicate_updates": 0, "sent": 0, "parse_error": 0, "error": 0, "skipped": 0, } for status in tqdm(statuses): data = status section_code = data.get("section_id_normalized") if section_code is None: stats["missing_data"] += 1 continue course_status = data.get("status") if course_status is None: stats["missing_data"] += 1 continue course_term = data.get("term") if course_term is None: stats["missing_data"] += 1 continue # Ignore sections not in db try: _, section = get_course_and_section(section_code, semester) except (Section.DoesNotExist, Course.DoesNotExist): stats["section_not_found"] += 1 continue # Ignore duplicate updates last_status_update = section.last_status_update if last_status_update and last_status_update.new_status == course_status: stats["duplicate_updates"] += 1 continue if should_send_pca_alert(course_term, course_status): try: alert_for_course( section_code, semester=course_term, sent_by="WEB", course_status=course_status, ) stats["sent"] += 1 except ValueError: stats["parse_error"] += 1 else: stats["skipped"] += 1 print(stats)
def should_send_pca_alert(course_term, course_status): if get_current_semester() != course_term: return False add_drop_period = get_or_create_add_drop_period(course_term) return (get_bool("SEND_FROM_WEBHOOK", False) and (course_status == "O" or course_status == "C") and (add_drop_period.end is None or datetime.utcnow().replace( tzinfo=gettz(TIME_ZONE)) < add_drop_period.end))
def pca_registration_open(): """ Returns True iff PCA should be accepting new registrations. """ current_adp = get_or_create_add_drop_period( semester=get_current_semester()) return get_bool("REGISTRATION_OPEN", True) and ( current_adp.end is None or datetime.utcnow().replace(tzinfo=gettz(TIME_ZONE)) < current_adp.end)
def create(self, request, *args, **kwargs): if Schedule.objects.filter(id=request.data.get("id")).exists(): return self.update(request, request.data.get("id")) try: sections = self.get_sections(request.data) except ObjectDoesNotExist: return Response( {"detail": "One or more sections not found in database."}, status=status.HTTP_400_BAD_REQUEST, ) semester_check_response = self.check_semester(request.data, sections) if semester_check_response is not None: return semester_check_response try: if ( "id" in request.data ): # Also from above we know that this id does not conflict with existing schedules. schedule = self.get_queryset().create( person=request.user, semester=request.data.get("semester", get_current_semester()), name=request.data.get("name"), id=request.data.get("id"), ) else: schedule = self.get_queryset().create( person=request.user, semester=request.data.get("semester", get_current_semester()), name=request.data.get("name"), ) schedule.sections.set(sections) return Response( {"message": "success", "id": schedule.id}, status=status.HTTP_201_CREATED ) except IntegrityError as e: return Response( { "detail": "IntegrityError encountered while trying to create: " + str(e.__cause__) }, status=status.HTTP_400_BAD_REQUEST, )
def send_course_alerts(course_code, course_status, semester=None, sent_by=""): if semester is None: semester = get_current_semester() for reg in get_registrations_for_alerts(course_code, semester, course_status=course_status): send_alert.delay(reg.id, close_notification=(course_status == "C"), sent_by=sent_by)
def register_for_course( course_code, email_address=None, phone=None, source=SOURCE_PCA, api_key=None, user=None, auto_resub=False, close_notification=False, ): """ This method is for the PCA 3rd party API (originally planned to service Penn Course Notify, until Notify's rejection of PCA's help and eventual downfall (coincidence? we think not...). It still may be used in the future so we are keeping the code. Returns RegStatus.<STATUS>, section.full_code, registration or None for the second two when appropriate """ if (not user and not email_address and not phone) or (user and not user.profile.email and not user.profile.phone and not user.profile.push_notifications): return RegStatus.NO_CONTACT_INFO, None, None try: _, section = get_course_and_section(course_code, get_current_semester()) except (Course.DoesNotExist, Section.DoesNotExist, ValueError): return RegStatus.COURSE_NOT_FOUND, None, None if user is None: registration = Registration(section=section, email=email_address, phone=phone, source=source) registration.validate_phone() if section.registrations.filter( email=email_address, phone=registration.phone, **Registration.is_active_filter()).exists(): return RegStatus.OPEN_REG_EXISTS, section.full_code, None else: if section.registrations.filter( user=user, **Registration.is_active_filter()).exists(): return RegStatus.OPEN_REG_EXISTS, section.full_code, None if close_notification and not user.profile.email and not user.profile.push_notifications: return RegStatus.TEXT_CLOSE_NOTIFICATION, section.full_code, None registration = Registration(section=section, user=user, source=source) registration.auto_resubscribe = auto_resub registration.close_notification = close_notification registration.api_key = api_key registration.save() return RegStatus.SUCCESS, section.full_code, registration
def get_queryset(self): sem = get_current_semester() queryset = Schedule.objects.filter(person=self.request.user, semester=sem) queryset = queryset.prefetch_related( Prefetch("sections", Section.with_reviews.all()), "sections__associated_sections", "sections__instructors", "sections__meetings", "sections__meetings__room", ) return queryset
def pre_ngss_requirement_filter(queryset, req_ids): if not req_ids: return queryset query = Q() for req_id in req_ids.split(","): code, school = req_id.split("@") try: requirement = PreNGSSRequirement.objects.get( code=code, school=school, semester=get_current_semester()) except PreNGSSRequirement.DoesNotExist: continue query &= Q(id__in=requirement.satisfying_courses.all()) return queryset.filter(query)
def set_all_status(semester=None): if semester is None: semester = get_current_semester() statuses = registrar.get_all_course_status(semester) for status in tqdm(statuses): section_code = status.get("section_id_normalized") if section_code is None: continue try: _, section = get_course_and_section(section_code, semester) except (Section.DoesNotExist, Course.DoesNotExist): continue section.status = status["status"] section.save()
def handle(self, *args, **options): days = options["days"] send_to_slack = options["slack"] start = timezone.now() - timezone.timedelta(days=days) qs = Registration.objects.filter( section__course__semester=get_current_semester()) num_registrations = qs.filter(created_at__gte=start, resubscribed_from__isnull=True).count() num_alerts_sent = qs.filter(notification_sent=True, notification_sent_at__gte=start).count() num_resubscribe = qs.filter(resubscribed_from__isnull=False, created_at__gte=start, auto_resubscribe=False).count() num_status_updates = StatusUpdate.objects.filter( created_at__gte=start).count() num_active_perpetual = qs.filter( resubscribed_to__isnull=True, auto_resubscribe=True, deleted=False, cancelled=False, notification_sent=False, ).count() num_cancelled_perpetual = (qs.filter( resubscribed_to__isnull=True, auto_resubscribe=True, ).filter(Q(deleted=True) | Q(cancelled=True)).count()) message = dedent(f""" {f'Penn Course Alert stats in the past {days} day(s)' f' since {start.strftime("%H:%M on %d %B, %Y")}'}: New registrations: {num_registrations} Alerts sent: {num_alerts_sent} Manual resubscribes: {num_resubscribe} Active auto-resubscribe requests: {num_active_perpetual} Cancelled auto-resubscribe requests: {num_cancelled_perpetual} Status Updates from Penn InTouch: {num_status_updates} """) if send_to_slack: url = settings.STATS_WEBHOOK print("sending to Slack...") requests.post(url, data=json.dumps({"text": message})) else: print(message)
def vectorize_user_by_courses(curr_courses, past_courses, curr_course_vectors_dict, past_course_vectors_dict): n = len(next(iter(curr_course_vectors_dict.values()))) # Input validation all_courses = set(curr_courses) | set(past_courses) if len(all_courses) != len(curr_courses) + len(past_courses): raise ValueError( "Repeated courses given in curr_courses and/or past_courses. " f"curr_courses: {str(curr_courses)}. past_courses: {str(past_courses)}" ) invalid_curr_courses = set(curr_courses) - { c.full_code for c in Course.objects.filter(semester=get_current_semester(), full_code__in=curr_courses) } if len(invalid_curr_courses) > 0: raise ValueError( "The following courses in curr_courses are invalid or not offered this semester: " f"{str(invalid_curr_courses)}") invalid_past_courses = set(past_courses) - { c.full_code for c in Course.objects.filter(full_code__in=past_courses) } if len(invalid_past_courses) > 0: raise ValueError( f"The following courses in past_courses are invalid: {str(invalid_past_courses)}" ) # Eliminate courses not in the model curr_courses = [c for c in curr_courses if c in curr_course_vectors_dict] past_courses = [c for c in past_courses if c in past_course_vectors_dict] curr_courses_vector = (np.zeros(n) if len(curr_courses) == 0 else sum( curr_course_vectors_dict[course] for course in curr_courses)) past_courses_vector = (np.zeros(n) if len(past_courses) == 0 else sum( past_course_vectors_dict[course] for course in past_courses)) vector = curr_courses_vector * CURR_COURSES_BIAS + past_courses_vector norm = np.linalg.norm(vector) vector = vector / norm if norm > 0 else vector return vector, all_courses
def vectorize_user(user, curr_course_vectors_dict, past_course_vectors_dict): """ Aggregates a vector over all the courses in the user's schedule """ curr_semester = get_current_semester() curr_courses = set([ s for s in Schedule.objects.filter(person=user, semester=curr_semester). values_list("sections__course__full_code", flat=True) if s is not None ]) past_courses = set([ s for s in Schedule.objects.filter( person=user, semester__lt=curr_semester).values_list( "sections__course__full_code", flat=True) if s is not None ]) past_courses = past_courses - curr_courses return vectorize_user_by_courses(list(curr_courses), list(past_courses), curr_course_vectors_dict, past_course_vectors_dict)
def current_percent_open(self): """ The percentage (expressed as a decimal number between 0 and 1) of the period between the beginning of its add/drop period and min[the current time, the end of its registration period] that this section was open. If this section's registration period hasn't started yet, this property is null (None in Python). """ from courses.util import get_current_semester, get_or_create_add_drop_period # ^ imported here to avoid circular imports if self.semester == get_current_semester(): add_drop = get_or_create_add_drop_period(self.semester) add_drop_start = add_drop.estimated_start add_drop_end = add_drop.estimated_end current_time = timezone.now() if current_time <= add_drop_start: return None try: last_status_update = StatusUpdate.objects.filter( section=self, created_at__gt=add_drop_start, created_at__lt=add_drop_end).latest("created_at") except StatusUpdate.DoesNotExist: last_status_update = None last_update_dt = last_status_update.created_at if last_status_update else add_drop_start period_seconds = float((min(current_time, add_drop_end) - add_drop_start).total_seconds()) percent_after_update = (float(self.is_open) * float( (current_time - last_update_dt).total_seconds()) / period_seconds) if last_status_update is None: return percent_after_update percent_before_update = (float(self.percent_open) * float( (last_update_dt - add_drop_start).total_seconds()) / period_seconds) return percent_before_update + percent_after_update else: return self.percent_open
def update(self, request, pk=None): if not Schedule.objects.filter(id=pk).exists(): return Response({"detail": "Not found."}, status=status.HTTP_404_NOT_FOUND) try: schedule = self.get_queryset().get(id=pk) except Schedule.DoesNotExist: return Response( {"detail": "You do not have access to the specified schedule."}, status=status.HTTP_403_FORBIDDEN, ) try: sections = self.get_sections(request.data) except ObjectDoesNotExist: return Response( {"detail": "One or more sections not found in database."}, status=status.HTTP_400_BAD_REQUEST, ) semester_check_response = self.check_semester(request.data, sections) if semester_check_response is not None: return semester_check_response try: schedule.person = request.user schedule.semester = request.data.get("semester", get_current_semester()) schedule.name = request.data.get("name") schedule.save() schedule.sections.set(sections) return Response({"message": "success", "id": schedule.id}, status=status.HTTP_200_OK) except IntegrityError as e: return Response( { "detail": "IntegrityError encountered while trying to update: " + str(e.__cause__) }, status=status.HTTP_400_BAD_REQUEST, )
def load_add_drop_dates(verbose=False): semester = get_current_semester() validate_add_drop_semester(semester) if verbose: print( f"Loading course selection period dates for semester {semester} from the Almanac" ) with transaction.atomic(): adp = get_or_create_add_drop_period(semester) start_date = adp.start end_date = adp.end html = requests.get( "https://almanac.upenn.edu/penn-academic-calendar").content soup = BeautifulSoup(html, "html.parser") if semester[4] == "C": start_sem = semester[:4] + " spring" end_sem = semester[:4] + " fall" elif semester[4] == "A": start_sem = str(int(semester[:4]) - 1) + " fall" end_sem = semester[:4] + " spring" else: raise ValueError( "This script currently only supports fall or spring semesters; " f"{semester} is invalid") tz = gettz(TIME_ZONE) s_year, s_month, s_day, e_year, e_month, e_day = (None, ) * 6 start_mode = 0 # 0 if start semester hasn't been found, 1 if it has, 2 if finished sem end_mode = 0 # 0 if end semester hasn't been found, 1 if it has, 2 if finished sem all_th_parents = {el.parent for el in soup.find_all("th")} months = [ "january", "february", "march", "april", "may", "june", "july", "august", "september", "october", "november", "december", ] for tr_el in soup.find_all("tr"): if tr_el in all_th_parents: sem_name = tr_el.th.get_text().lower() if start_sem in sem_name: start_mode = 1 elif start_mode == 1: start_mode = 2 if end_sem in sem_name: end_mode = 1 elif end_mode == 1: end_mode = 2 else: children = list(tr_el.findChildren("td", recursive=False)) title = children[0] date_string = children[1].get_text() if title is not None and "advance registration" in title.get_text( ).lower(): if start_mode == 1: dates = date_string.split("-") ar_begin_month = None for month in months: if month in dates[0].lower(): ar_begin_month = month ar_end_month = None for month in months: if month in dates[0].lower(): ar_end_month = month if ar_end_month is None: ar_end_month = ar_begin_month s_year = int(start_sem[:4]) if ar_end_month is not None: s_month = months.index(ar_end_month) + 1 day_candidates = [ int(s) for s in dates[1].split() if s.isdigit() ] if len(day_candidates) > 0: s_day = day_candidates[0] if title is not None and "course selection period ends" in title.get_text( ).lower(): if end_mode == 1: course_sel_end_month = None for month in months: if month in date_string.lower(): course_sel_end_month = month e_year = int(end_sem[:4]) if course_sel_end_month is not None: e_month = months.index(course_sel_end_month) + 1 day_candidates = [ int(s) for s in date_string.split() if s.isdigit() ] if len(day_candidates) > 0: e_day = day_candidates[0] if None not in [s_year, s_month, s_day] and start_date is None: start_date = make_aware( datetime.strptime(f"{s_year}-{s_month}-{s_day} 07:00", "%Y-%m-%d %H:%M") + timedelta(days=1), timezone=tz, ) if verbose: print( "NOTE: Add/drop date start was estimated as the end of the advanced " "registration period. Replace this date with the actual start of the " "add/drop period through the Django admin console when it is announced " "to students each semester.") if None not in [e_year, e_month, e_day]: end_date = make_aware( datetime.strptime(f"{e_year}-{e_month}-{e_day} 11:59", "%Y-%m-%d %H:%M"), timezone=tz, ) adp.estimated_start, adp.end = start_date, end_date adp.save() if verbose: print("Done!")
def get_demand_data(semesters, section_query="", verbose=False): current_semester = get_current_semester() output_dict = dict() recompute_precomputed_fields(verbose=True) if verbose: print(f"Computing demand data for semesters {str(semesters)}...") for semester_num, semester in enumerate(semesters): try: validate_add_drop_semester(semester) except ValidationError: if verbose: print( f"Skipping semester {semester} (unsupported kind for stats)." ) continue add_drop_period = get_or_create_add_drop_period(semester) if verbose: print(f"Processing semester {semester}, " f"{(semester_num+1)}/{len(semesters)}.\n") output_dict[semester] = [] # list of demand data dicts section_id_to_object = dict( ) # maps section id to section object (for this semester) volume_changes_map = dict( ) # maps section id to list of volume changes status_updates_map = dict( ) # maps section id to list of status updates iterator_wrapper = tqdm if verbose else (lambda x: x) if verbose: print("Indexing relevant sections...") for section in iterator_wrapper( Section.objects.filter( extra_metrics_section_filters, full_code__startswith=section_query, course__semester=semester, ).annotate( efficient_semester=F("course__semester"), ).distinct()): section_id_to_object[section.id] = section volume_changes_map[section.id] = [] status_updates_map[section.id] = [] if verbose: print( "Computing registration volume changes over time for each section..." ) for registration in iterator_wrapper( Registration.objects.filter( section_id__in=section_id_to_object.keys()).annotate( section_capacity=F("section__capacity"))): section_id = registration.section_id volume_changes_map[section_id].append({ "date": registration.created_at, "volume_change": 1 }) deactivated_at = registration.deactivated_at if deactivated_at is not None: volume_changes_map[section_id].append({ "date": deactivated_at, "volume_change": -1 }) if verbose: print("Collecting status updates over time for each section...") for status_update in iterator_wrapper( StatusUpdate.objects.filter( section_id__in=section_id_to_object.keys(), in_add_drop_period=True)): section_id = status_update.section_id status_updates_map[section_id].append({ "date": status_update.created_at, "old_status": status_update.old_status, "new_status": status_update.new_status, }) if verbose: print("Joining updates for each section and sorting...") all_changes = sorted( [{ "type": "status_update", "section_id": section_id, **update } for section_id, status_updates_list in status_updates_map.items() for update in status_updates_list] + [{ "type": "volume_change", "section_id": section_id, **change } for section_id, changes_list in volume_changes_map.items() for change in changes_list], key=lambda x: (x["date"], int(x["type"] != "status_update")), # put status updates first on matching dates ) # Initialize variables to be maintained in our main all_changes loop latest_popularity_dist_estimate = None registration_volumes = { section_id: 0 for section_id in section_id_to_object.keys() } demands = {section_id: 0 for section_id in section_id_to_object.keys()} # Initialize section statuses section_status = { section_id: None for section_id in section_id_to_object.keys() } for change in all_changes: section_id = change["section_id"] if change["type"] == "status_update": if section_status[section_id] is None: section_status[section_id] = change["old_status"] percent_through = (add_drop_period.get_percent_through_add_drop( timezone.now()) if semester == current_semester else 1) if percent_through == 0: if verbose: print( f"Skipping semester {semester} because the add/drop period " f"hasn't started yet.") continue distribution_estimate_threshold = sum( len(changes_list) for changes_list in volume_changes_map.values()) // ( ROUGH_MINIMUM_DEMAND_DISTRIBUTION_ESTIMATES * percent_through) num_changes_without_estimate = 0 if verbose: print(f"Compiling demand data for semester {semester}...") for change in iterator_wrapper(all_changes): section_id = change["section_id"] if section_status[section_id] is None: section_status[section_id] = ( "O" if section_id_to_object[section_id].percent_open > 0.5 else "C") if change["type"] == "status_update": section_status[section_id] = change["new_status"] continue date = change["date"] volume_change = change["volume_change"] registration_volumes[section_id] += volume_change demands[section_id] = (registration_volumes[section_id] / section_id_to_object[section_id].capacity) max_id = max(demands.keys(), key=lambda x: demands[x]) min_id = min(demands.keys(), key=lambda x: demands[x]) if (latest_popularity_dist_estimate is None or section_id == latest_popularity_dist_estimate[ "highest_demand_section"].id or section_id == latest_popularity_dist_estimate["lowest_demand_section"].id or latest_popularity_dist_estimate["highest_demand_section"]. id != max_id or latest_popularity_dist_estimate["lowest_demand_section"].id != min_id or num_changes_without_estimate >= distribution_estimate_threshold): num_changes_without_estimate = 0 output_dict[semester].append({ "percent_through": percent_through, "demands": [ val for sec_id, val in demands.items() if section_status[sec_id] == "C" ], }) latest_popularity_dist_estimate = { "created_at": date, "semester": semester, "highest_demand_section": section_id_to_object[max_id], "highest_demand_section_volume": registration_volumes[max_id], "lowest_demand_section": section_id_to_object[min_id], "lowest_demand_section_volume": registration_volumes[min_id], } else: num_changes_without_estimate += 1 return output_dict
def get_queryset(self): return StatusUpdate.objects.filter( section__full_code=self.kwargs["full_code"], section__course__semester=get_current_semester(), in_add_drop_period=True, ).order_by("created_at")
def save(self, load_script=False, *args, **kwargs): """ This save method enforces the following invariants on the registration: - The `phone` field is converted to E164 format, or set to `None` if unparseable. - If the `user` field is not `None`, but either of the legacy `phone` or `email` fields are not `None`, the contents of the `phone` / `email` fields are moved to the `profile` of the `user` object (this was only a concern during the PCA refresh transition process, when we switched away from using these legacy fields). - If `head_registration` is `None`, it is set to a self-reference. - Any other registration whose `head_registration` equals `self.resubscribed_from` are updated to have `self` as their `head_registration`. - The `original_created_at` field is set to the `created_at` of the tail of the resubscribe chain. If `load_script` is set to False (indicating this registration is being actively created by a PCA user, rather than being loaded in from an external data source), and the registration's semester is the current semester, and the registration has just been created or deactivated, then the `PcaDemandDistributionEstimate` model and `current_demand_distribution_estimate` cache are asynchronously updated (via a celery task) to reflect the resulting section demand change. """ from alert.tasks import section_demand_change from courses.util import get_set_id, is_fk_set # ^ imported here to avoid circular imports with transaction.atomic(): self.validate_phone() if self.user is not None: if self.email is not None: user_data, _ = UserProfile.objects.get_or_create( user=self.user) user_data.email = self.email user_data.save() self.user.profile = user_data self.user.save() self.email = None if self.phone is not None: user_data, _ = UserProfile.objects.get_or_create( user=self.user) user_data.phone = self.phone user_data.save() self.user.profile = user_data self.user.save() self.phone = None # Find old registration old_registration = Registration.objects.get( id=self.id) if self.id else None was_active = bool(old_registration and old_registration.is_active) # Set head_registration to self if not set if not is_fk_set(self, "head_registration"): self.head_registration_id = self.id or get_set_id(self) super().save(*args, **kwargs) if self.resubscribed_from_id: Registration.objects.filter( head_registration_id=self.resubscribed_from_id).update( head_registration=self) if self.original_created_at is None: self.original_created_at = self.get_original_registration( ).created_at super().save() if (not load_script and self.section.semester == get_current_semester() and was_active != self.is_active): section = self.section volume_change = int(self.is_active) - int(was_active) if volume_change > 0 or section.registration_volume >= 1: section.registration_volume += volume_change section.save() section_demand_change.delay(section.id, self.updated_at)
def section_demand_change(section_id, updated_at): """ This function should be called when a section's demand changes (i.e. the number of active registrations changes, or the section's status is updated). It updates the `PcaDemandDistributionEstimate` model and `current_demand_distribution_estimate` cache to reflect the demand change. :param: section_id: the id of the section involved in the demand change :param: updated_at: the datetime at which the demand change occurred """ section = Section.objects.get(id=section_id) semester = section.semester if semester != get_current_semester(): return with transaction.atomic(): create_new_distribution_estimate = False sentinel = object() current_demand_distribution_estimate = cache.get( "current_demand_distribution_estimate", sentinel) if (current_demand_distribution_estimate == sentinel or current_demand_distribution_estimate.semester != semester): create_new_distribution_estimate = True sections_qs = (Section.objects.filter( extra_metrics_section_filters, course__semester=semester).select_for_update().annotate( raw_demand=Case( When( Q(capacity__gt=0), then=(Cast( "registration_volume", models.FloatField(), ) / Cast("capacity", models.FloatField())), ), default=None, output_field=models.FloatField(), ), ).order_by("raw_demand")) try: lowest_demand_section = sections_qs[:1].get() highest_demand_section = sections_qs[-1:].get() except Section.DoesNotExist: return # Don't add a PcaDemandDistributionEstimate -- there are no valid sections yet if (create_new_distribution_estimate or highest_demand_section.raw_demand > current_demand_distribution_estimate.highest_raw_demand or lowest_demand_section.raw_demand < current_demand_distribution_estimate.lowest_raw_demand): closed_sections_demand_values = np.asarray( sections_qs.filter(status="C").values_list("raw_demand", flat=True)) # "The term 'closed sections positive raw demand values' is # sometimes abbreviated as 'csprdv' csrdv_frac_zero, fit_shape, fit_loc, fit_scale = (None, None, None, None) if len(closed_sections_demand_values) > 0: closed_sections_positive_demand_values = closed_sections_demand_values[ np.where(closed_sections_demand_values > 0)] csrdv_frac_zero = 1 - len( closed_sections_positive_demand_values) / len( closed_sections_demand_values) if len(closed_sections_positive_demand_values) > 0: fit_shape, fit_loc, fit_scale = stats.lognorm.fit( closed_sections_positive_demand_values) new_demand_distribution_estimate = PcaDemandDistributionEstimate( semester=semester, highest_demand_section=highest_demand_section, highest_demand_section_volume=highest_demand_section. registration_volume, lowest_demand_section=lowest_demand_section, lowest_demand_section_volume=lowest_demand_section. registration_volume, csrdv_frac_zero=csrdv_frac_zero, csprdv_lognorm_param_shape=fit_shape, csprdv_lognorm_param_loc=fit_loc, csprdv_lognorm_param_scale=fit_scale, ) add_drop_period = get_or_create_add_drop_period(semester) new_demand_distribution_estimate.save( add_drop_period=add_drop_period) new_demand_distribution_estimate.created_at = updated_at new_demand_distribution_estimate.save( add_drop_period=add_drop_period) cache.set( "current_demand_distribution_estimate", new_demand_distribution_estimate, timeout=(add_drop_period.estimated_end - add_drop_period.estimated_start).total_seconds() // ROUGH_MINIMUM_DEMAND_DISTRIBUTION_ESTIMATES, ) # set timeout to roughly follow ROUGH_MINIMUM_DEMAND_DISTRIBUTION_ESTIMATES
def recompute_percent_open(semesters=None, verbose=False, semesters_precomputed=False): """ Recomputes the percent_open field for each section in the given semester(s). :param semesters: The semesters argument should be a comma-separated list of string semesters corresponding to the semesters for which you want to recompute percent_open fields, i.e. "2019C,2020A,2020C" for fall 2019, spring 2020, and fall 2020. It defaults to None, in which case only the current semester is used. If you supply the string "all", it will recompute for all semesters found in Courses in the db. If semesters_precomputed is set to True (non-default), then this argument should instead be a list of single string semesters. :param semesters_precomputed: If False (default), the semesters argument will expect a raw comma-separated string input. If True, the semesters argument will expect a list of individual string semesters. :param verbose: Set to True if you want this script to print its status as it goes, or keep as False (default) if you want the script to work silently. """ current_semester = get_current_semester() semesters = ( semesters if semesters_precomputed else get_semesters(semesters=semesters, verbose=verbose) ) if verbose: print(f"Recomputing open percentages for semesters {str(semesters)}...") for semester_num, semester in enumerate(semesters): with transaction.atomic(): # We make this command an atomic transaction, so that the database will not # be modified unless the entire update for a semester succeeds. if verbose: print(f"\nProcessing semester {semester}, " f"{(semester_num+1)}/{len(semesters)}.") add_drop = get_or_create_add_drop_period(semester) add_drop_start = add_drop.estimated_start add_drop_end = add_drop.estimated_end StatusUpdate.objects.filter(section__course__semester=semester).select_for_update() sections = Section.objects.filter(course__semester=semester) num_erroneous_updates = 0 num_total_updates = 0 for section in sections: status_updates = StatusUpdate.objects.filter( section=section, created_at__gt=add_drop_start, created_at__lt=add_drop_end ).order_by("created_at") num_total_updates += len(status_updates) total_open_seconds = 0 if not status_updates.exists(): try: guess_status = ( StatusUpdate.objects.filter( section=section, created_at__lte=add_drop_start ) .latest("created_at") .new_status ) except StatusUpdate.DoesNotExist: guess_status = section.status section.percent_open = float(guess_status == "O") else: last_dt = add_drop_start last_status = status_updates.first().old_status for update in status_updates: if last_status != update.old_status: num_erroneous_updates += 1 if last_status == "O" and update.new_status != "O": total_open_seconds += (update.created_at - last_dt).total_seconds() last_dt = update.created_at last_status = update.new_status section.percent_open = float(total_open_seconds) / float( (status_updates.last().created_at - add_drop_start).total_seconds() ) if section.semester != current_semester: section.percent_open = float( total_open_seconds + int(last_status == "O") * (add_drop_end - last_dt).total_seconds() ) / float((add_drop_end - add_drop_start).total_seconds()) section.save() if verbose: print( f"Finished calculating percent_open for {len(sections)} sections from " f"semester {semester}, encountered {num_erroneous_updates} erroneous " f"Status Updates (out of {num_total_updates} total Status Updates)" ) if verbose: print(f"Finished recomputing open percentages for semesters {str(semesters)}.")
def recompute_demand_distribution_estimates( semesters=None, semesters_precomputed=False, verbose=False ): """ This script recomputes all PcaDemandDistributionEstimate objects for the given semester(s) based on saved Registration objects. In doing so, it also recomputes the registration_volume and percent_open fields for all sections in the given semester(s) (by calling recompute_registration_volumes and recompute_percent_open). :param semesters: The semesters argument should be a comma-separated list of string semesters corresponding to the semesters for which you want to recompute demand distribution estimate, i.e. "2019C,2020A,2020C" for fall 2019, spring 2020, and fall 2020. It defaults to None, in which case only the current semester is used. If you supply the string "all", it will recompute for all semesters found in Courses in the db. If semesters_precomputed is set to True (non-default), then this argument should instead be a list of single string semesters. :param semesters_precomputed: If False (default), the semesters argument will expect a raw comma-separated string input. If True, the semesters argument will expect a list of individual string semesters. :param verbose: Set to True if you want this script to print its status as it goes, or keep as False (default) if you want the script to work silently. """ current_semester = get_current_semester() semesters = ( semesters if semesters_precomputed else get_semesters(semesters=semesters, verbose=verbose) ) recompute_precomputed_fields(verbose=verbose) recompute_registration_volumes(semesters=semesters, semesters_precomputed=True, verbose=verbose) recompute_percent_open(semesters=semesters, semesters_precomputed=True, verbose=verbose) if verbose: print(f"Recomputing demand distribution estimates for semesters {str(semesters)}...") for semester_num, semester in enumerate(semesters): try: validate_add_drop_semester(semester) except ValidationError: if verbose: print(f"Skipping semester {semester} (unsupported kind for stats).") continue add_drop_period = get_or_create_add_drop_period(semester) set_cache = semester == current_semester with transaction.atomic(): # We make this command an atomic transaction, so that the database will not # be modified unless the entire update for a semester succeeds. # If set_cache is True, we will set the current_demand_distribution_estimate variable # in cache if verbose: print(f"Processing semester {semester}, " f"{(semester_num+1)}/{len(semesters)}.\n") print( "Deleting existing PcaDemandDistributionEstimate objects for semester " f"{semester} (so that we can recompute these objects)..." ) PcaDemandDistributionEstimate.objects.filter( semester=semester ).select_for_update().delete() section_id_to_object = dict() # maps section id to section object (for this semester) volume_changes_map = dict() # maps section id to list of volume changes status_updates_map = dict() # maps section id to list of status updates iterator_wrapper = tqdm if verbose else (lambda x: x) if verbose: print("Indexing relevant sections...") for section in iterator_wrapper( Section.objects.filter(extra_metrics_section_filters, course__semester=semester) .annotate( efficient_semester=F("course__semester"), ) .distinct() ): section_id_to_object[section.id] = section volume_changes_map[section.id] = [] status_updates_map[section.id] = [] if verbose: print("Computing registration volume changes over time for each section...") for registration in iterator_wrapper( Registration.objects.filter(section_id__in=section_id_to_object.keys()) .annotate(section_capacity=F("section__capacity")) .select_for_update() ): section_id = registration.section_id volume_changes_map[section_id].append( {"date": registration.created_at, "volume_change": 1} ) deactivated_at = registration.deactivated_at if deactivated_at is not None: volume_changes_map[section_id].append( {"date": deactivated_at, "volume_change": -1} ) if verbose: print("Collecting status updates over time for each section...") for status_update in iterator_wrapper( StatusUpdate.objects.filter( section_id__in=section_id_to_object.keys(), in_add_drop_period=True ).select_for_update() ): section_id = status_update.section_id status_updates_map[section_id].append( { "date": status_update.created_at, "old_status": status_update.old_status, "new_status": status_update.new_status, } ) if verbose: print("Joining updates for each section and sorting...") all_changes = sorted( [ {"type": "status_update", "section_id": section_id, **update} for section_id, status_updates_list in status_updates_map.items() for update in status_updates_list ] + [ {"type": "volume_change", "section_id": section_id, **change} for section_id, changes_list in volume_changes_map.items() for change in changes_list ], key=lambda x: (x["date"], int(x["type"] != "status_update")), # put status updates first on matching dates ) # Initialize variables to be maintained in our main all_changes loop latest_popularity_dist_estimate = None registration_volumes = {section_id: 0 for section_id in section_id_to_object.keys()} demands = {section_id: 0 for section_id in section_id_to_object.keys()} # Initialize section statuses section_status = {section_id: None for section_id in section_id_to_object.keys()} for change in all_changes: section_id = change["section_id"] if change["type"] == "status_update": if section_status[section_id] is None: section_status[section_id] = change["old_status"] percent_through = ( add_drop_period.get_percent_through_add_drop(timezone.now()) if semester == current_semester else 1 ) if percent_through == 0: if verbose: print( f"Skipping semester {semester} because the add/drop period " f"hasn't started yet." ) continue distribution_estimate_threshold = sum( len(changes_list) for changes_list in volume_changes_map.values() ) // (ROUGH_MINIMUM_DEMAND_DISTRIBUTION_ESTIMATES * percent_through) num_changes_without_estimate = 0 if verbose: print(f"Creating PcaDemandDistributionEstimate objects for semester {semester}...") for change in iterator_wrapper(all_changes): section_id = change["section_id"] if section_status[section_id] is None: section_status[section_id] = ( "O" if section_id_to_object[section_id].percent_open > 0.5 else "C" ) if change["type"] == "status_update": section_status[section_id] = change["new_status"] continue date = change["date"] volume_change = change["volume_change"] registration_volumes[section_id] += volume_change demands[section_id] = ( registration_volumes[section_id] / section_id_to_object[section_id].capacity ) max_id = max(demands.keys(), key=lambda x: demands[x]) min_id = min(demands.keys(), key=lambda x: demands[x]) if ( latest_popularity_dist_estimate is None or section_id == latest_popularity_dist_estimate.highest_demand_section_id or section_id == latest_popularity_dist_estimate.lowest_demand_section_id or latest_popularity_dist_estimate.highest_demand_section_id != max_id or latest_popularity_dist_estimate.lowest_demand_section_id != min_id or num_changes_without_estimate >= distribution_estimate_threshold ): num_changes_without_estimate = 0 closed_sections_demand_values = np.asarray( [val for sec_id, val in demands.items() if section_status[sec_id] == "C"] ) csrdv_frac_zero, fit_shape, fit_loc, fit_scale = (None, None, None, None) if len(closed_sections_demand_values) > 0: closed_sections_positive_demand_values = closed_sections_demand_values[ np.where(closed_sections_demand_values > 0) ] csrdv_frac_zero = 1 - len(closed_sections_positive_demand_values) / len( closed_sections_demand_values ) if len(closed_sections_positive_demand_values) > 0: fit_shape, fit_loc, fit_scale = stats.lognorm.fit( closed_sections_positive_demand_values ) latest_popularity_dist_estimate = PcaDemandDistributionEstimate( created_at=date, semester=semester, highest_demand_section=section_id_to_object[max_id], highest_demand_section_volume=registration_volumes[max_id], lowest_demand_section=section_id_to_object[min_id], lowest_demand_section_volume=registration_volumes[min_id], csrdv_frac_zero=csrdv_frac_zero, csprdv_lognorm_param_shape=fit_shape, csprdv_lognorm_param_loc=fit_loc, csprdv_lognorm_param_scale=fit_scale, ) latest_popularity_dist_estimate.save(add_drop_period=add_drop_period) latest_popularity_dist_estimate.created_at = date latest_popularity_dist_estimate.save(add_drop_period=add_drop_period) else: num_changes_without_estimate += 1 if set_cache: if latest_popularity_dist_estimate is not None: cache.set( "current_demand_distribution_estimate", latest_popularity_dist_estimate, timeout=None, ) else: cache.set("current_demand_distribution_estimate", None, timeout=None) if verbose: print( "Finished recomputing demand distribution estimate and section registration_volume " f"fields for semesters {str(semesters)}." )
def recommend_courses_view(request): """ This route will optionally take in current and past courses. In order to make recommendations solely on the user's courses in past and current PCP schedules, simply omit both the curr_courses and past_courses fields in your request. Otherwise, in order to specify past and current courses, include a "curr-courses" and/or "past_courses" attribute in the request that should each contain an array of string course full codes of the form DEPT-XXX (e.g. CIS-120). If successful, this route will return a list of recommended courses, with the same schema as the List Courses route, starting with the most relevant course. The number of recommended courses returned can be specified using the n_recommendations attribute in the request body, but if this attribute is omitted, the default will be 5. If n_recommendations is not an integer, or is <=0, a 400 will be returned. If curr_courses contains repeated courses or invalid courses or non-current courses, a 400 will be returned. If past_courses contains repeated courses or invalid courses, a 400 will be returned. If curr_courses and past_courses contain overlapping courses, a 400 will be returned. """ user = request.user curr_courses = request.data.get("curr_courses", None) curr_courses = curr_courses if curr_courses is not None else [] past_courses = request.data.get("past_courses", None) past_courses = past_courses if past_courses is not None else [] n_recommendations = request.data.get("n_recommendations", 5) # input validation try: n_recommendations = int(n_recommendations) except ValueError: return Response( f"n_recommendations: {n_recommendations} is not int", status=status.HTTP_400_BAD_REQUEST, ) if n_recommendations <= 0: return Response( f"n_recommendations: {n_recommendations} <= 0", status=status.HTTP_400_BAD_REQUEST, ) course_clusters = retrieve_course_clusters() ( cluster_centroids, clusters, curr_course_vectors_dict, past_course_vectors_dict, ) = course_clusters if curr_courses or past_courses: try: user_vector, user_courses = vectorize_user_by_courses( clean_course_input(curr_courses), clean_course_input(past_courses), curr_course_vectors_dict, past_course_vectors_dict, ) except ValueError as e: return Response( str(e), status=status.HTTP_400_BAD_REQUEST, ) else: user_vector, user_courses = vectorize_user( user, curr_course_vectors_dict, past_course_vectors_dict ) recommended_course_codes = recommend_courses( curr_course_vectors_dict, cluster_centroids, clusters, user_vector, user_courses, n_recommendations, ) queryset = Course.with_reviews.filter( semester=get_current_semester(), full_code__in=recommended_course_codes ) queryset = queryset.prefetch_related( Prefetch( "sections", Section.with_reviews.all() .filter(credits__isnull=False) .filter(Q(status="O") | Q(status="C")) .distinct() .prefetch_related("course", "meetings__room"), ) ) return Response( CourseListSerializer( queryset, many=True, ).data, status=status.HTTP_200_OK, )
def handle(self, *args, **kwargs): root_logger = logging.getLogger("") root_logger.setLevel(logging.DEBUG) src = kwargs["src"] semesters = kwargs["semester"] import_all = kwargs["import_all"] s3_bucket = kwargs["s3_bucket"] is_zip_file = kwargs["zip"] or s3_bucket is not None summary_file = kwargs["summary_file"] import_details = kwargs["import_details"] import_descriptions = kwargs["import_descriptions"] show_progress_bar = kwargs["show_progress_bar"] force = kwargs["force"] if src is None: raise CommandError("source directory or zip must be defined.") if semesters is None and not import_all: raise CommandError( "Must define semester with (-s) or explicitly import all semesters with (-a)." ) if semesters is not None: current_semester = get_current_semester() for semester in semesters: if semester == current_semester: raise ValueError( f"You cannot import reviews for the current semester ({current_semester}). " f"Did you forget to update the SEMESTER option in the Django admin console?" ) if s3_bucket is not None: fp = "/tmp/pcrdump.zip" # Make sure AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY # are loaded in as environment variables. print(f"downloading zip from s3 bucket: {src}") S3_client.download_file(s3_bucket, src, fp) src = fp print( "This script is an atomic transaction, meaning the database will only be " "modified if the whole script succeeds.") with transaction.atomic( ): # Only commit changes if the whole script succeeds # TODO: When we import details and crosslistings, get their data here too. tables_to_get = [summary_file] idx = 1 detail_idx = -1 if import_details: tables_to_get.append(ISC_RATING_TABLE) detail_idx = idx idx += 1 description_idx = -1 if import_descriptions: tables_to_get.append(ISC_DESC_TABLE) description_idx = idx idx += 1 files = self.get_files(src, is_zip_file, tables_to_get) summary_fo = files[0] print("Loading summary file...") summary_rows = load_sql_dump(summary_fo, progress=show_progress_bar, lazy=False) gc.collect() print("SQL parsed and loaded!") if not import_all: full_len = len(summary_rows) summary_rows = [ r for r in summary_rows if r["TERM"] in semesters ] gc.collect() filtered_len = len(summary_rows) print(f"Filtered {full_len} rows down to {filtered_len} rows.") semesters = sorted(list({r["TERM"] for r in summary_rows})) gc.collect() to_delete = Review.objects.filter( section__course__semester__in=semesters) delete_count = to_delete.count() if delete_count > 0: if not force: prompt = input( f"This import will overwrite {delete_count} rows that have already been" + "imported. Continue? (y/N) ") if prompt.strip().upper() != "Y": print("Aborting...") return 0 print( f"Deleting {delete_count} existing reviews for semesters from the database..." ) to_delete.delete() print(f"Importing reviews for semester(s) {', '.join(semesters)}") stats = import_summary_rows(summary_rows, show_progress_bar) print(stats) gc.collect() if import_details: print("Loading details file...") stats = import_ratings_rows(*load_sql_dump(files[detail_idx]), semesters, show_progress_bar) print(stats) gc.collect() if import_descriptions: print("Loading descriptions file...") stats = import_description_rows( *load_sql_dump(files[description_idx]), None if import_all else semesters, show_progress_bar, ) print(stats) self.close_files(files) # invalidate cached views print("Invalidating cache...") del_count = clear_cache() print( f"{del_count if del_count >=0 else 'all'} cache entries removed." ) gc.collect() print( f"Recomputing stats for semester(s) {', '.join(semesters)}...") recompute_stats( semesters=semesters, semesters_precomputed=True, verbose=True, ) return 0