def save(self, *args, **kwargs): """ This overridden save method first gets the add/drop period object for the semester of this StatusUpdate object (either by using the get_or_create_add_drop_period method or by using a passed-in add_drop_period kwarg, which can be used for efficiency in bulk operations over many StatusUpdate objects). Then it calls the overridden save method, and after that it sets the percent_through_add_drop_period field. """ from alert.models import validate_add_drop_semester from alert.tasks import section_demand_change from courses.util import get_or_create_add_drop_period # ^ imported here to avoid circular imports add_drop_period = None if "add_drop_period" in kwargs: add_drop_period = kwargs["add_drop_period"] del kwargs["add_drop_period"] super().save(*args, **kwargs) # If this is a valid add/drop semester, set the percent_through_add_drop_period field try: validate_add_drop_semester(self.section.semester) except ValidationError: return if add_drop_period is None: add_drop_period = get_or_create_add_drop_period( self.section.semester) created_at = self.created_at start = add_drop_period.estimated_start end = add_drop_period.estimated_end if created_at < start: self.in_add_drop_period = False self.percent_through_add_drop_period = 0 elif created_at > end: self.in_add_drop_period = False self.percent_through_add_drop_period = 1 else: self.in_add_drop_period = True self.percent_through_add_drop_period = (created_at - start) / (end - start) super().save() self.section.has_status_updates = True self.section.save() section_demand_change.delay(self.section.id, self.created_at)
def fill_in_add_drop_periods(verbose=False): all_semesters = set( Course.objects.values_list("semester", flat=True).distinct()) adp_semesters = set( AddDropPeriod.objects.values_list("semester", flat=True).distinct()) missing_semesters = set() for candidate in all_semesters - adp_semesters: try: validate_add_drop_semester(candidate) missing_semesters.add(candidate) except ValidationError: pass if verbose and len(missing_semesters) > 0: print( f"Filling in AddDropPeriod objects for semesters {missing_semesters}" ) for semester in missing_semesters: AddDropPeriod(semester=semester).save() return adp_semesters.union(missing_semesters)
def record_update(section, semester, old_status, new_status, alerted, req, created_at=None): from alert.models import validate_add_drop_semester # avoid circular imports u = StatusUpdate( section=section, old_status=old_status, new_status=new_status, alert_sent=alerted, request_body=req, ) if created_at is not None: u.created_at = created_at u.save() valid_status_choices = dict(Section.STATUS_CHOICES).keys() def validate_status(name, status): if status not in valid_status_choices: raise ValidationError( f"{name} is invalid; expected a value in {valid_status_choices}, but got {status}" ) validate_status("Old status", old_status) validate_status("New status", new_status) # Raises ValidationError if semester is not fall or spring (and correctly formatted) validate_add_drop_semester(semester) update_percent_open(section, u) return u
def load_add_drop_dates(verbose=False): semester = get_current_semester() validate_add_drop_semester(semester) if verbose: print( f"Loading course selection period dates for semester {semester} from the Almanac" ) with transaction.atomic(): adp = get_or_create_add_drop_period(semester) start_date = adp.start end_date = adp.end html = requests.get( "https://almanac.upenn.edu/penn-academic-calendar").content soup = BeautifulSoup(html, "html.parser") if semester[4] == "C": start_sem = semester[:4] + " spring" end_sem = semester[:4] + " fall" elif semester[4] == "A": start_sem = str(int(semester[:4]) - 1) + " fall" end_sem = semester[:4] + " spring" else: raise ValueError( "This script currently only supports fall or spring semesters; " f"{semester} is invalid") tz = gettz(TIME_ZONE) s_year, s_month, s_day, e_year, e_month, e_day = (None, ) * 6 start_mode = 0 # 0 if start semester hasn't been found, 1 if it has, 2 if finished sem end_mode = 0 # 0 if end semester hasn't been found, 1 if it has, 2 if finished sem all_th_parents = {el.parent for el in soup.find_all("th")} months = [ "january", "february", "march", "april", "may", "june", "july", "august", "september", "october", "november", "december", ] for tr_el in soup.find_all("tr"): if tr_el in all_th_parents: sem_name = tr_el.th.get_text().lower() if start_sem in sem_name: start_mode = 1 elif start_mode == 1: start_mode = 2 if end_sem in sem_name: end_mode = 1 elif end_mode == 1: end_mode = 2 else: children = list(tr_el.findChildren("td", recursive=False)) title = children[0] date_string = children[1].get_text() if title is not None and "advance registration" in title.get_text( ).lower(): if start_mode == 1: dates = date_string.split("-") ar_begin_month = None for month in months: if month in dates[0].lower(): ar_begin_month = month ar_end_month = None for month in months: if month in dates[0].lower(): ar_end_month = month if ar_end_month is None: ar_end_month = ar_begin_month s_year = int(start_sem[:4]) if ar_end_month is not None: s_month = months.index(ar_end_month) + 1 day_candidates = [ int(s) for s in dates[1].split() if s.isdigit() ] if len(day_candidates) > 0: s_day = day_candidates[0] if title is not None and "course selection period ends" in title.get_text( ).lower(): if end_mode == 1: course_sel_end_month = None for month in months: if month in date_string.lower(): course_sel_end_month = month e_year = int(end_sem[:4]) if course_sel_end_month is not None: e_month = months.index(course_sel_end_month) + 1 day_candidates = [ int(s) for s in date_string.split() if s.isdigit() ] if len(day_candidates) > 0: e_day = day_candidates[0] if None not in [s_year, s_month, s_day] and start_date is None: start_date = make_aware( datetime.strptime(f"{s_year}-{s_month}-{s_day} 07:00", "%Y-%m-%d %H:%M") + timedelta(days=1), timezone=tz, ) if verbose: print( "NOTE: Add/drop date start was estimated as the end of the advanced " "registration period. Replace this date with the actual start of the " "add/drop period through the Django admin console when it is announced " "to students each semester.") if None not in [e_year, e_month, e_day]: end_date = make_aware( datetime.strptime(f"{e_year}-{e_month}-{e_day} 11:59", "%Y-%m-%d %H:%M"), timezone=tz, ) adp.estimated_start, adp.end = start_date, end_date adp.save() if verbose: print("Done!")
def get_demand_data(semesters, section_query="", verbose=False): current_semester = get_current_semester() output_dict = dict() recompute_precomputed_fields(verbose=True) if verbose: print(f"Computing demand data for semesters {str(semesters)}...") for semester_num, semester in enumerate(semesters): try: validate_add_drop_semester(semester) except ValidationError: if verbose: print( f"Skipping semester {semester} (unsupported kind for stats)." ) continue add_drop_period = get_or_create_add_drop_period(semester) if verbose: print(f"Processing semester {semester}, " f"{(semester_num+1)}/{len(semesters)}.\n") output_dict[semester] = [] # list of demand data dicts section_id_to_object = dict( ) # maps section id to section object (for this semester) volume_changes_map = dict( ) # maps section id to list of volume changes status_updates_map = dict( ) # maps section id to list of status updates iterator_wrapper = tqdm if verbose else (lambda x: x) if verbose: print("Indexing relevant sections...") for section in iterator_wrapper( Section.objects.filter( extra_metrics_section_filters, full_code__startswith=section_query, course__semester=semester, ).annotate( efficient_semester=F("course__semester"), ).distinct()): section_id_to_object[section.id] = section volume_changes_map[section.id] = [] status_updates_map[section.id] = [] if verbose: print( "Computing registration volume changes over time for each section..." ) for registration in iterator_wrapper( Registration.objects.filter( section_id__in=section_id_to_object.keys()).annotate( section_capacity=F("section__capacity"))): section_id = registration.section_id volume_changes_map[section_id].append({ "date": registration.created_at, "volume_change": 1 }) deactivated_at = registration.deactivated_at if deactivated_at is not None: volume_changes_map[section_id].append({ "date": deactivated_at, "volume_change": -1 }) if verbose: print("Collecting status updates over time for each section...") for status_update in iterator_wrapper( StatusUpdate.objects.filter( section_id__in=section_id_to_object.keys(), in_add_drop_period=True)): section_id = status_update.section_id status_updates_map[section_id].append({ "date": status_update.created_at, "old_status": status_update.old_status, "new_status": status_update.new_status, }) if verbose: print("Joining updates for each section and sorting...") all_changes = sorted( [{ "type": "status_update", "section_id": section_id, **update } for section_id, status_updates_list in status_updates_map.items() for update in status_updates_list] + [{ "type": "volume_change", "section_id": section_id, **change } for section_id, changes_list in volume_changes_map.items() for change in changes_list], key=lambda x: (x["date"], int(x["type"] != "status_update")), # put status updates first on matching dates ) # Initialize variables to be maintained in our main all_changes loop latest_popularity_dist_estimate = None registration_volumes = { section_id: 0 for section_id in section_id_to_object.keys() } demands = {section_id: 0 for section_id in section_id_to_object.keys()} # Initialize section statuses section_status = { section_id: None for section_id in section_id_to_object.keys() } for change in all_changes: section_id = change["section_id"] if change["type"] == "status_update": if section_status[section_id] is None: section_status[section_id] = change["old_status"] percent_through = (add_drop_period.get_percent_through_add_drop( timezone.now()) if semester == current_semester else 1) if percent_through == 0: if verbose: print( f"Skipping semester {semester} because the add/drop period " f"hasn't started yet.") continue distribution_estimate_threshold = sum( len(changes_list) for changes_list in volume_changes_map.values()) // ( ROUGH_MINIMUM_DEMAND_DISTRIBUTION_ESTIMATES * percent_through) num_changes_without_estimate = 0 if verbose: print(f"Compiling demand data for semester {semester}...") for change in iterator_wrapper(all_changes): section_id = change["section_id"] if section_status[section_id] is None: section_status[section_id] = ( "O" if section_id_to_object[section_id].percent_open > 0.5 else "C") if change["type"] == "status_update": section_status[section_id] = change["new_status"] continue date = change["date"] volume_change = change["volume_change"] registration_volumes[section_id] += volume_change demands[section_id] = (registration_volumes[section_id] / section_id_to_object[section_id].capacity) max_id = max(demands.keys(), key=lambda x: demands[x]) min_id = min(demands.keys(), key=lambda x: demands[x]) if (latest_popularity_dist_estimate is None or section_id == latest_popularity_dist_estimate[ "highest_demand_section"].id or section_id == latest_popularity_dist_estimate["lowest_demand_section"].id or latest_popularity_dist_estimate["highest_demand_section"]. id != max_id or latest_popularity_dist_estimate["lowest_demand_section"].id != min_id or num_changes_without_estimate >= distribution_estimate_threshold): num_changes_without_estimate = 0 output_dict[semester].append({ "percent_through": percent_through, "demands": [ val for sec_id, val in demands.items() if section_status[sec_id] == "C" ], }) latest_popularity_dist_estimate = { "created_at": date, "semester": semester, "highest_demand_section": section_id_to_object[max_id], "highest_demand_section_volume": registration_volumes[max_id], "lowest_demand_section": section_id_to_object[min_id], "lowest_demand_section_volume": registration_volumes[min_id], } else: num_changes_without_estimate += 1 return output_dict
def recompute_demand_distribution_estimates( semesters=None, semesters_precomputed=False, verbose=False ): """ This script recomputes all PcaDemandDistributionEstimate objects for the given semester(s) based on saved Registration objects. In doing so, it also recomputes the registration_volume and percent_open fields for all sections in the given semester(s) (by calling recompute_registration_volumes and recompute_percent_open). :param semesters: The semesters argument should be a comma-separated list of string semesters corresponding to the semesters for which you want to recompute demand distribution estimate, i.e. "2019C,2020A,2020C" for fall 2019, spring 2020, and fall 2020. It defaults to None, in which case only the current semester is used. If you supply the string "all", it will recompute for all semesters found in Courses in the db. If semesters_precomputed is set to True (non-default), then this argument should instead be a list of single string semesters. :param semesters_precomputed: If False (default), the semesters argument will expect a raw comma-separated string input. If True, the semesters argument will expect a list of individual string semesters. :param verbose: Set to True if you want this script to print its status as it goes, or keep as False (default) if you want the script to work silently. """ current_semester = get_current_semester() semesters = ( semesters if semesters_precomputed else get_semesters(semesters=semesters, verbose=verbose) ) recompute_precomputed_fields(verbose=verbose) recompute_registration_volumes(semesters=semesters, semesters_precomputed=True, verbose=verbose) recompute_percent_open(semesters=semesters, semesters_precomputed=True, verbose=verbose) if verbose: print(f"Recomputing demand distribution estimates for semesters {str(semesters)}...") for semester_num, semester in enumerate(semesters): try: validate_add_drop_semester(semester) except ValidationError: if verbose: print(f"Skipping semester {semester} (unsupported kind for stats).") continue add_drop_period = get_or_create_add_drop_period(semester) set_cache = semester == current_semester with transaction.atomic(): # We make this command an atomic transaction, so that the database will not # be modified unless the entire update for a semester succeeds. # If set_cache is True, we will set the current_demand_distribution_estimate variable # in cache if verbose: print(f"Processing semester {semester}, " f"{(semester_num+1)}/{len(semesters)}.\n") print( "Deleting existing PcaDemandDistributionEstimate objects for semester " f"{semester} (so that we can recompute these objects)..." ) PcaDemandDistributionEstimate.objects.filter( semester=semester ).select_for_update().delete() section_id_to_object = dict() # maps section id to section object (for this semester) volume_changes_map = dict() # maps section id to list of volume changes status_updates_map = dict() # maps section id to list of status updates iterator_wrapper = tqdm if verbose else (lambda x: x) if verbose: print("Indexing relevant sections...") for section in iterator_wrapper( Section.objects.filter(extra_metrics_section_filters, course__semester=semester) .annotate( efficient_semester=F("course__semester"), ) .distinct() ): section_id_to_object[section.id] = section volume_changes_map[section.id] = [] status_updates_map[section.id] = [] if verbose: print("Computing registration volume changes over time for each section...") for registration in iterator_wrapper( Registration.objects.filter(section_id__in=section_id_to_object.keys()) .annotate(section_capacity=F("section__capacity")) .select_for_update() ): section_id = registration.section_id volume_changes_map[section_id].append( {"date": registration.created_at, "volume_change": 1} ) deactivated_at = registration.deactivated_at if deactivated_at is not None: volume_changes_map[section_id].append( {"date": deactivated_at, "volume_change": -1} ) if verbose: print("Collecting status updates over time for each section...") for status_update in iterator_wrapper( StatusUpdate.objects.filter( section_id__in=section_id_to_object.keys(), in_add_drop_period=True ).select_for_update() ): section_id = status_update.section_id status_updates_map[section_id].append( { "date": status_update.created_at, "old_status": status_update.old_status, "new_status": status_update.new_status, } ) if verbose: print("Joining updates for each section and sorting...") all_changes = sorted( [ {"type": "status_update", "section_id": section_id, **update} for section_id, status_updates_list in status_updates_map.items() for update in status_updates_list ] + [ {"type": "volume_change", "section_id": section_id, **change} for section_id, changes_list in volume_changes_map.items() for change in changes_list ], key=lambda x: (x["date"], int(x["type"] != "status_update")), # put status updates first on matching dates ) # Initialize variables to be maintained in our main all_changes loop latest_popularity_dist_estimate = None registration_volumes = {section_id: 0 for section_id in section_id_to_object.keys()} demands = {section_id: 0 for section_id in section_id_to_object.keys()} # Initialize section statuses section_status = {section_id: None for section_id in section_id_to_object.keys()} for change in all_changes: section_id = change["section_id"] if change["type"] == "status_update": if section_status[section_id] is None: section_status[section_id] = change["old_status"] percent_through = ( add_drop_period.get_percent_through_add_drop(timezone.now()) if semester == current_semester else 1 ) if percent_through == 0: if verbose: print( f"Skipping semester {semester} because the add/drop period " f"hasn't started yet." ) continue distribution_estimate_threshold = sum( len(changes_list) for changes_list in volume_changes_map.values() ) // (ROUGH_MINIMUM_DEMAND_DISTRIBUTION_ESTIMATES * percent_through) num_changes_without_estimate = 0 if verbose: print(f"Creating PcaDemandDistributionEstimate objects for semester {semester}...") for change in iterator_wrapper(all_changes): section_id = change["section_id"] if section_status[section_id] is None: section_status[section_id] = ( "O" if section_id_to_object[section_id].percent_open > 0.5 else "C" ) if change["type"] == "status_update": section_status[section_id] = change["new_status"] continue date = change["date"] volume_change = change["volume_change"] registration_volumes[section_id] += volume_change demands[section_id] = ( registration_volumes[section_id] / section_id_to_object[section_id].capacity ) max_id = max(demands.keys(), key=lambda x: demands[x]) min_id = min(demands.keys(), key=lambda x: demands[x]) if ( latest_popularity_dist_estimate is None or section_id == latest_popularity_dist_estimate.highest_demand_section_id or section_id == latest_popularity_dist_estimate.lowest_demand_section_id or latest_popularity_dist_estimate.highest_demand_section_id != max_id or latest_popularity_dist_estimate.lowest_demand_section_id != min_id or num_changes_without_estimate >= distribution_estimate_threshold ): num_changes_without_estimate = 0 closed_sections_demand_values = np.asarray( [val for sec_id, val in demands.items() if section_status[sec_id] == "C"] ) csrdv_frac_zero, fit_shape, fit_loc, fit_scale = (None, None, None, None) if len(closed_sections_demand_values) > 0: closed_sections_positive_demand_values = closed_sections_demand_values[ np.where(closed_sections_demand_values > 0) ] csrdv_frac_zero = 1 - len(closed_sections_positive_demand_values) / len( closed_sections_demand_values ) if len(closed_sections_positive_demand_values) > 0: fit_shape, fit_loc, fit_scale = stats.lognorm.fit( closed_sections_positive_demand_values ) latest_popularity_dist_estimate = PcaDemandDistributionEstimate( created_at=date, semester=semester, highest_demand_section=section_id_to_object[max_id], highest_demand_section_volume=registration_volumes[max_id], lowest_demand_section=section_id_to_object[min_id], lowest_demand_section_volume=registration_volumes[min_id], csrdv_frac_zero=csrdv_frac_zero, csprdv_lognorm_param_shape=fit_shape, csprdv_lognorm_param_loc=fit_loc, csprdv_lognorm_param_scale=fit_scale, ) latest_popularity_dist_estimate.save(add_drop_period=add_drop_period) latest_popularity_dist_estimate.created_at = date latest_popularity_dist_estimate.save(add_drop_period=add_drop_period) else: num_changes_without_estimate += 1 if set_cache: if latest_popularity_dist_estimate is not None: cache.set( "current_demand_distribution_estimate", latest_popularity_dist_estimate, timeout=None, ) else: cache.set("current_demand_distribution_estimate", None, timeout=None) if verbose: print( "Finished recomputing demand distribution estimate and section registration_volume " f"fields for semesters {str(semesters)}." )