def save(self, *args, **kwargs): """ This save method first gets the add/drop period object for this PcaDemandDistributionEstimate object's semester (either by calling the get_or_create_add_drop_period method or by using a passed-in add_drop_period kwarg, which can be used for efficiency in bulk operations over PcaDemandDistributionEstimate objects). """ if "add_drop_period" in kwargs: add_drop_period = kwargs["add_drop_period"] del kwargs["add_drop_period"] else: add_drop_period = get_or_create_add_drop_period(self.semester) super().save(*args, **kwargs) created_at = self.created_at start = add_drop_period.estimated_start end = add_drop_period.estimated_end if created_at < start: self.in_add_drop_period = False self.percent_through_add_drop_period = 0 elif created_at > end: self.in_add_drop_period = False self.percent_through_add_drop_period = 1 else: self.in_add_drop_period = True self.percent_through_add_drop_period = (created_at - start) / (end - start) super().save()
def should_send_pca_alert(course_term, course_status): if get_current_semester() != course_term: return False add_drop_period = get_or_create_add_drop_period(course_term) return (get_bool("SEND_FROM_WEBHOOK", False) and (course_status == "O" or course_status == "C") and (add_drop_period.end is None or datetime.utcnow().replace( tzinfo=gettz(TIME_ZONE)) < add_drop_period.end))
def pca_registration_open(): """ Returns True iff PCA should be accepting new registrations. """ current_adp = get_or_create_add_drop_period( semester=get_current_semester()) return get_bool("REGISTRATION_OPEN", True) and ( current_adp.end is None or datetime.utcnow().replace(tzinfo=gettz(TIME_ZONE)) < current_adp.end)
def save(self, *args, **kwargs): """ This overridden save method first gets the add/drop period object for the semester of this StatusUpdate object (either by using the get_or_create_add_drop_period method or by using a passed-in add_drop_period kwarg, which can be used for efficiency in bulk operations over many StatusUpdate objects). Then it calls the overridden save method, and after that it sets the percent_through_add_drop_period field. """ from alert.models import validate_add_drop_semester from alert.tasks import section_demand_change from courses.util import get_or_create_add_drop_period # ^ imported here to avoid circular imports add_drop_period = None if "add_drop_period" in kwargs: add_drop_period = kwargs["add_drop_period"] del kwargs["add_drop_period"] super().save(*args, **kwargs) # If this is a valid add/drop semester, set the percent_through_add_drop_period field try: validate_add_drop_semester(self.section.semester) except ValidationError: return if add_drop_period is None: add_drop_period = get_or_create_add_drop_period( self.section.semester) created_at = self.created_at start = add_drop_period.estimated_start end = add_drop_period.estimated_end if created_at < start: self.in_add_drop_period = False self.percent_through_add_drop_period = 0 elif created_at > end: self.in_add_drop_period = False self.percent_through_add_drop_period = 1 else: self.in_add_drop_period = True self.percent_through_add_drop_period = (created_at - start) / (end - start) super().save() self.section.has_status_updates = True self.section.save() section_demand_change.delay(self.section.id, self.created_at)
def current_percent_open(self): """ The percentage (expressed as a decimal number between 0 and 1) of the period between the beginning of its add/drop period and min[the current time, the end of its registration period] that this section was open. If this section's registration period hasn't started yet, this property is null (None in Python). """ from courses.util import get_current_semester, get_or_create_add_drop_period # ^ imported here to avoid circular imports if self.semester == get_current_semester(): add_drop = get_or_create_add_drop_period(self.semester) add_drop_start = add_drop.estimated_start add_drop_end = add_drop.estimated_end current_time = timezone.now() if current_time <= add_drop_start: return None try: last_status_update = StatusUpdate.objects.filter( section=self, created_at__gt=add_drop_start, created_at__lt=add_drop_end).latest("created_at") except StatusUpdate.DoesNotExist: last_status_update = None last_update_dt = last_status_update.created_at if last_status_update else add_drop_start period_seconds = float((min(current_time, add_drop_end) - add_drop_start).total_seconds()) percent_after_update = (float(self.is_open) * float( (current_time - last_update_dt).total_seconds()) / period_seconds) if last_status_update is None: return percent_after_update percent_before_update = (float(self.percent_open) * float( (last_update_dt - add_drop_start).total_seconds()) / period_seconds) return percent_before_update + percent_after_update else: return self.percent_open
def load_add_drop_dates(verbose=False): semester = get_current_semester() validate_add_drop_semester(semester) if verbose: print( f"Loading course selection period dates for semester {semester} from the Almanac" ) with transaction.atomic(): adp = get_or_create_add_drop_period(semester) start_date = adp.start end_date = adp.end html = requests.get( "https://almanac.upenn.edu/penn-academic-calendar").content soup = BeautifulSoup(html, "html.parser") if semester[4] == "C": start_sem = semester[:4] + " spring" end_sem = semester[:4] + " fall" elif semester[4] == "A": start_sem = str(int(semester[:4]) - 1) + " fall" end_sem = semester[:4] + " spring" else: raise ValueError( "This script currently only supports fall or spring semesters; " f"{semester} is invalid") tz = gettz(TIME_ZONE) s_year, s_month, s_day, e_year, e_month, e_day = (None, ) * 6 start_mode = 0 # 0 if start semester hasn't been found, 1 if it has, 2 if finished sem end_mode = 0 # 0 if end semester hasn't been found, 1 if it has, 2 if finished sem all_th_parents = {el.parent for el in soup.find_all("th")} months = [ "january", "february", "march", "april", "may", "june", "july", "august", "september", "october", "november", "december", ] for tr_el in soup.find_all("tr"): if tr_el in all_th_parents: sem_name = tr_el.th.get_text().lower() if start_sem in sem_name: start_mode = 1 elif start_mode == 1: start_mode = 2 if end_sem in sem_name: end_mode = 1 elif end_mode == 1: end_mode = 2 else: children = list(tr_el.findChildren("td", recursive=False)) title = children[0] date_string = children[1].get_text() if title is not None and "advance registration" in title.get_text( ).lower(): if start_mode == 1: dates = date_string.split("-") ar_begin_month = None for month in months: if month in dates[0].lower(): ar_begin_month = month ar_end_month = None for month in months: if month in dates[0].lower(): ar_end_month = month if ar_end_month is None: ar_end_month = ar_begin_month s_year = int(start_sem[:4]) if ar_end_month is not None: s_month = months.index(ar_end_month) + 1 day_candidates = [ int(s) for s in dates[1].split() if s.isdigit() ] if len(day_candidates) > 0: s_day = day_candidates[0] if title is not None and "course selection period ends" in title.get_text( ).lower(): if end_mode == 1: course_sel_end_month = None for month in months: if month in date_string.lower(): course_sel_end_month = month e_year = int(end_sem[:4]) if course_sel_end_month is not None: e_month = months.index(course_sel_end_month) + 1 day_candidates = [ int(s) for s in date_string.split() if s.isdigit() ] if len(day_candidates) > 0: e_day = day_candidates[0] if None not in [s_year, s_month, s_day] and start_date is None: start_date = make_aware( datetime.strptime(f"{s_year}-{s_month}-{s_day} 07:00", "%Y-%m-%d %H:%M") + timedelta(days=1), timezone=tz, ) if verbose: print( "NOTE: Add/drop date start was estimated as the end of the advanced " "registration period. Replace this date with the actual start of the " "add/drop period through the Django admin console when it is announced " "to students each semester.") if None not in [e_year, e_month, e_day]: end_date = make_aware( datetime.strptime(f"{e_year}-{e_month}-{e_day} 11:59", "%Y-%m-%d %H:%M"), timezone=tz, ) adp.estimated_start, adp.end = start_date, end_date adp.save() if verbose: print("Done!")
def get_demand_data(semesters, section_query="", verbose=False): current_semester = get_current_semester() output_dict = dict() recompute_precomputed_fields(verbose=True) if verbose: print(f"Computing demand data for semesters {str(semesters)}...") for semester_num, semester in enumerate(semesters): try: validate_add_drop_semester(semester) except ValidationError: if verbose: print( f"Skipping semester {semester} (unsupported kind for stats)." ) continue add_drop_period = get_or_create_add_drop_period(semester) if verbose: print(f"Processing semester {semester}, " f"{(semester_num+1)}/{len(semesters)}.\n") output_dict[semester] = [] # list of demand data dicts section_id_to_object = dict( ) # maps section id to section object (for this semester) volume_changes_map = dict( ) # maps section id to list of volume changes status_updates_map = dict( ) # maps section id to list of status updates iterator_wrapper = tqdm if verbose else (lambda x: x) if verbose: print("Indexing relevant sections...") for section in iterator_wrapper( Section.objects.filter( extra_metrics_section_filters, full_code__startswith=section_query, course__semester=semester, ).annotate( efficient_semester=F("course__semester"), ).distinct()): section_id_to_object[section.id] = section volume_changes_map[section.id] = [] status_updates_map[section.id] = [] if verbose: print( "Computing registration volume changes over time for each section..." ) for registration in iterator_wrapper( Registration.objects.filter( section_id__in=section_id_to_object.keys()).annotate( section_capacity=F("section__capacity"))): section_id = registration.section_id volume_changes_map[section_id].append({ "date": registration.created_at, "volume_change": 1 }) deactivated_at = registration.deactivated_at if deactivated_at is not None: volume_changes_map[section_id].append({ "date": deactivated_at, "volume_change": -1 }) if verbose: print("Collecting status updates over time for each section...") for status_update in iterator_wrapper( StatusUpdate.objects.filter( section_id__in=section_id_to_object.keys(), in_add_drop_period=True)): section_id = status_update.section_id status_updates_map[section_id].append({ "date": status_update.created_at, "old_status": status_update.old_status, "new_status": status_update.new_status, }) if verbose: print("Joining updates for each section and sorting...") all_changes = sorted( [{ "type": "status_update", "section_id": section_id, **update } for section_id, status_updates_list in status_updates_map.items() for update in status_updates_list] + [{ "type": "volume_change", "section_id": section_id, **change } for section_id, changes_list in volume_changes_map.items() for change in changes_list], key=lambda x: (x["date"], int(x["type"] != "status_update")), # put status updates first on matching dates ) # Initialize variables to be maintained in our main all_changes loop latest_popularity_dist_estimate = None registration_volumes = { section_id: 0 for section_id in section_id_to_object.keys() } demands = {section_id: 0 for section_id in section_id_to_object.keys()} # Initialize section statuses section_status = { section_id: None for section_id in section_id_to_object.keys() } for change in all_changes: section_id = change["section_id"] if change["type"] == "status_update": if section_status[section_id] is None: section_status[section_id] = change["old_status"] percent_through = (add_drop_period.get_percent_through_add_drop( timezone.now()) if semester == current_semester else 1) if percent_through == 0: if verbose: print( f"Skipping semester {semester} because the add/drop period " f"hasn't started yet.") continue distribution_estimate_threshold = sum( len(changes_list) for changes_list in volume_changes_map.values()) // ( ROUGH_MINIMUM_DEMAND_DISTRIBUTION_ESTIMATES * percent_through) num_changes_without_estimate = 0 if verbose: print(f"Compiling demand data for semester {semester}...") for change in iterator_wrapper(all_changes): section_id = change["section_id"] if section_status[section_id] is None: section_status[section_id] = ( "O" if section_id_to_object[section_id].percent_open > 0.5 else "C") if change["type"] == "status_update": section_status[section_id] = change["new_status"] continue date = change["date"] volume_change = change["volume_change"] registration_volumes[section_id] += volume_change demands[section_id] = (registration_volumes[section_id] / section_id_to_object[section_id].capacity) max_id = max(demands.keys(), key=lambda x: demands[x]) min_id = min(demands.keys(), key=lambda x: demands[x]) if (latest_popularity_dist_estimate is None or section_id == latest_popularity_dist_estimate[ "highest_demand_section"].id or section_id == latest_popularity_dist_estimate["lowest_demand_section"].id or latest_popularity_dist_estimate["highest_demand_section"]. id != max_id or latest_popularity_dist_estimate["lowest_demand_section"].id != min_id or num_changes_without_estimate >= distribution_estimate_threshold): num_changes_without_estimate = 0 output_dict[semester].append({ "percent_through": percent_through, "demands": [ val for sec_id, val in demands.items() if section_status[sec_id] == "C" ], }) latest_popularity_dist_estimate = { "created_at": date, "semester": semester, "highest_demand_section": section_id_to_object[max_id], "highest_demand_section_volume": registration_volumes[max_id], "lowest_demand_section": section_id_to_object[min_id], "lowest_demand_section_volume": registration_volumes[min_id], } else: num_changes_without_estimate += 1 return output_dict
def handle(self, *args, **kwargs): src = os.path.abspath(kwargs["src"]) _, file_extension = os.path.splitext(kwargs["src"]) if not os.path.exists(src): return "File does not exist." if file_extension != ".csv": return "File is not a csv." sections_map = dict() # maps (full_code, semester) to section id row_count = 0 with open(src) as history_file: history_reader = csv.reader(history_file) sections_to_fetch = set() for row in history_reader: sections_to_fetch.add((row[0], row[1])) row_count += 1 full_codes = list(set([sec[0] for sec in sections_to_fetch])) semesters = list(set([sec[1] for sec in sections_to_fetch])) section_obs = Section.objects.filter( full_code__in=full_codes, course__semester__in=semesters).annotate( efficient_semester=F("course__semester")) for section_ob in section_obs: sections_map[section_ob.full_code, section_ob.efficient_semester] = section_ob.id add_drop_periods = dict() # maps semester to AddDropPeriod object for adp in AddDropPeriod.objects.filter(semester__in=semesters): add_drop_periods[adp.semester] = adp print( "This script is atomic, meaning either all the status updates from the given " "CSV will be loaded into the database, or otherwise if an error is encountered, " "all changes will be rolled back and the database will remain as it was " "before the script was run.") with transaction.atomic(): with open(src) as history_file: print(f"Beginning to load status history from {src}") history_reader = csv.reader(history_file) for row in tqdm(history_reader, total=row_count): full_code = row[0] semester = row[1] created_at = datetime.strptime(row[2], "%Y-%m-%d %H:%M:%S.%f %Z") created_at = make_aware(created_at, timezone=gettz(TIME_ZONE), is_dst=None) old_status = row[3] new_status = row[4] alert_sent = row[5] if old_status != "O" and old_status != "C" and old_status != "X": old_status = "" if new_status != "O" and new_status != "C" and new_status != "X": new_status = "" if (full_code, semester) not in sections_map.keys(): raise ValueError( f"Section {full_code} {semester} not found in db.") section_id = sections_map[full_code, semester] status_update = StatusUpdate( section_id=section_id, old_status=old_status, new_status=new_status, created_at=created_at, alert_sent=alert_sent, ) if semester not in add_drop_periods: add_drop_periods[ semester] = get_or_create_add_drop_period(semester) status_update.save( add_drop_period=add_drop_periods[semester]) print( f"Finished loading status history from {src}... processed {row_count} rows. " ) print( f"Recomputing PCA Stats for {len(semesters)} semesters...") recompute_stats(semesters=",".join(semesters), verbose=True)
def section_demand_change(section_id, updated_at): """ This function should be called when a section's demand changes (i.e. the number of active registrations changes, or the section's status is updated). It updates the `PcaDemandDistributionEstimate` model and `current_demand_distribution_estimate` cache to reflect the demand change. :param: section_id: the id of the section involved in the demand change :param: updated_at: the datetime at which the demand change occurred """ section = Section.objects.get(id=section_id) semester = section.semester if semester != get_current_semester(): return with transaction.atomic(): create_new_distribution_estimate = False sentinel = object() current_demand_distribution_estimate = cache.get( "current_demand_distribution_estimate", sentinel) if (current_demand_distribution_estimate == sentinel or current_demand_distribution_estimate.semester != semester): create_new_distribution_estimate = True sections_qs = (Section.objects.filter( extra_metrics_section_filters, course__semester=semester).select_for_update().annotate( raw_demand=Case( When( Q(capacity__gt=0), then=(Cast( "registration_volume", models.FloatField(), ) / Cast("capacity", models.FloatField())), ), default=None, output_field=models.FloatField(), ), ).order_by("raw_demand")) try: lowest_demand_section = sections_qs[:1].get() highest_demand_section = sections_qs[-1:].get() except Section.DoesNotExist: return # Don't add a PcaDemandDistributionEstimate -- there are no valid sections yet if (create_new_distribution_estimate or highest_demand_section.raw_demand > current_demand_distribution_estimate.highest_raw_demand or lowest_demand_section.raw_demand < current_demand_distribution_estimate.lowest_raw_demand): closed_sections_demand_values = np.asarray( sections_qs.filter(status="C").values_list("raw_demand", flat=True)) # "The term 'closed sections positive raw demand values' is # sometimes abbreviated as 'csprdv' csrdv_frac_zero, fit_shape, fit_loc, fit_scale = (None, None, None, None) if len(closed_sections_demand_values) > 0: closed_sections_positive_demand_values = closed_sections_demand_values[ np.where(closed_sections_demand_values > 0)] csrdv_frac_zero = 1 - len( closed_sections_positive_demand_values) / len( closed_sections_demand_values) if len(closed_sections_positive_demand_values) > 0: fit_shape, fit_loc, fit_scale = stats.lognorm.fit( closed_sections_positive_demand_values) new_demand_distribution_estimate = PcaDemandDistributionEstimate( semester=semester, highest_demand_section=highest_demand_section, highest_demand_section_volume=highest_demand_section. registration_volume, lowest_demand_section=lowest_demand_section, lowest_demand_section_volume=lowest_demand_section. registration_volume, csrdv_frac_zero=csrdv_frac_zero, csprdv_lognorm_param_shape=fit_shape, csprdv_lognorm_param_loc=fit_loc, csprdv_lognorm_param_scale=fit_scale, ) add_drop_period = get_or_create_add_drop_period(semester) new_demand_distribution_estimate.save( add_drop_period=add_drop_period) new_demand_distribution_estimate.created_at = updated_at new_demand_distribution_estimate.save( add_drop_period=add_drop_period) cache.set( "current_demand_distribution_estimate", new_demand_distribution_estimate, timeout=(add_drop_period.estimated_end - add_drop_period.estimated_start).total_seconds() // ROUGH_MINIMUM_DEMAND_DISTRIBUTION_ESTIMATES, ) # set timeout to roughly follow ROUGH_MINIMUM_DEMAND_DISTRIBUTION_ESTIMATES
def recompute_demand_distribution_estimates( semesters=None, semesters_precomputed=False, verbose=False ): """ This script recomputes all PcaDemandDistributionEstimate objects for the given semester(s) based on saved Registration objects. In doing so, it also recomputes the registration_volume and percent_open fields for all sections in the given semester(s) (by calling recompute_registration_volumes and recompute_percent_open). :param semesters: The semesters argument should be a comma-separated list of string semesters corresponding to the semesters for which you want to recompute demand distribution estimate, i.e. "2019C,2020A,2020C" for fall 2019, spring 2020, and fall 2020. It defaults to None, in which case only the current semester is used. If you supply the string "all", it will recompute for all semesters found in Courses in the db. If semesters_precomputed is set to True (non-default), then this argument should instead be a list of single string semesters. :param semesters_precomputed: If False (default), the semesters argument will expect a raw comma-separated string input. If True, the semesters argument will expect a list of individual string semesters. :param verbose: Set to True if you want this script to print its status as it goes, or keep as False (default) if you want the script to work silently. """ current_semester = get_current_semester() semesters = ( semesters if semesters_precomputed else get_semesters(semesters=semesters, verbose=verbose) ) recompute_precomputed_fields(verbose=verbose) recompute_registration_volumes(semesters=semesters, semesters_precomputed=True, verbose=verbose) recompute_percent_open(semesters=semesters, semesters_precomputed=True, verbose=verbose) if verbose: print(f"Recomputing demand distribution estimates for semesters {str(semesters)}...") for semester_num, semester in enumerate(semesters): try: validate_add_drop_semester(semester) except ValidationError: if verbose: print(f"Skipping semester {semester} (unsupported kind for stats).") continue add_drop_period = get_or_create_add_drop_period(semester) set_cache = semester == current_semester with transaction.atomic(): # We make this command an atomic transaction, so that the database will not # be modified unless the entire update for a semester succeeds. # If set_cache is True, we will set the current_demand_distribution_estimate variable # in cache if verbose: print(f"Processing semester {semester}, " f"{(semester_num+1)}/{len(semesters)}.\n") print( "Deleting existing PcaDemandDistributionEstimate objects for semester " f"{semester} (so that we can recompute these objects)..." ) PcaDemandDistributionEstimate.objects.filter( semester=semester ).select_for_update().delete() section_id_to_object = dict() # maps section id to section object (for this semester) volume_changes_map = dict() # maps section id to list of volume changes status_updates_map = dict() # maps section id to list of status updates iterator_wrapper = tqdm if verbose else (lambda x: x) if verbose: print("Indexing relevant sections...") for section in iterator_wrapper( Section.objects.filter(extra_metrics_section_filters, course__semester=semester) .annotate( efficient_semester=F("course__semester"), ) .distinct() ): section_id_to_object[section.id] = section volume_changes_map[section.id] = [] status_updates_map[section.id] = [] if verbose: print("Computing registration volume changes over time for each section...") for registration in iterator_wrapper( Registration.objects.filter(section_id__in=section_id_to_object.keys()) .annotate(section_capacity=F("section__capacity")) .select_for_update() ): section_id = registration.section_id volume_changes_map[section_id].append( {"date": registration.created_at, "volume_change": 1} ) deactivated_at = registration.deactivated_at if deactivated_at is not None: volume_changes_map[section_id].append( {"date": deactivated_at, "volume_change": -1} ) if verbose: print("Collecting status updates over time for each section...") for status_update in iterator_wrapper( StatusUpdate.objects.filter( section_id__in=section_id_to_object.keys(), in_add_drop_period=True ).select_for_update() ): section_id = status_update.section_id status_updates_map[section_id].append( { "date": status_update.created_at, "old_status": status_update.old_status, "new_status": status_update.new_status, } ) if verbose: print("Joining updates for each section and sorting...") all_changes = sorted( [ {"type": "status_update", "section_id": section_id, **update} for section_id, status_updates_list in status_updates_map.items() for update in status_updates_list ] + [ {"type": "volume_change", "section_id": section_id, **change} for section_id, changes_list in volume_changes_map.items() for change in changes_list ], key=lambda x: (x["date"], int(x["type"] != "status_update")), # put status updates first on matching dates ) # Initialize variables to be maintained in our main all_changes loop latest_popularity_dist_estimate = None registration_volumes = {section_id: 0 for section_id in section_id_to_object.keys()} demands = {section_id: 0 for section_id in section_id_to_object.keys()} # Initialize section statuses section_status = {section_id: None for section_id in section_id_to_object.keys()} for change in all_changes: section_id = change["section_id"] if change["type"] == "status_update": if section_status[section_id] is None: section_status[section_id] = change["old_status"] percent_through = ( add_drop_period.get_percent_through_add_drop(timezone.now()) if semester == current_semester else 1 ) if percent_through == 0: if verbose: print( f"Skipping semester {semester} because the add/drop period " f"hasn't started yet." ) continue distribution_estimate_threshold = sum( len(changes_list) for changes_list in volume_changes_map.values() ) // (ROUGH_MINIMUM_DEMAND_DISTRIBUTION_ESTIMATES * percent_through) num_changes_without_estimate = 0 if verbose: print(f"Creating PcaDemandDistributionEstimate objects for semester {semester}...") for change in iterator_wrapper(all_changes): section_id = change["section_id"] if section_status[section_id] is None: section_status[section_id] = ( "O" if section_id_to_object[section_id].percent_open > 0.5 else "C" ) if change["type"] == "status_update": section_status[section_id] = change["new_status"] continue date = change["date"] volume_change = change["volume_change"] registration_volumes[section_id] += volume_change demands[section_id] = ( registration_volumes[section_id] / section_id_to_object[section_id].capacity ) max_id = max(demands.keys(), key=lambda x: demands[x]) min_id = min(demands.keys(), key=lambda x: demands[x]) if ( latest_popularity_dist_estimate is None or section_id == latest_popularity_dist_estimate.highest_demand_section_id or section_id == latest_popularity_dist_estimate.lowest_demand_section_id or latest_popularity_dist_estimate.highest_demand_section_id != max_id or latest_popularity_dist_estimate.lowest_demand_section_id != min_id or num_changes_without_estimate >= distribution_estimate_threshold ): num_changes_without_estimate = 0 closed_sections_demand_values = np.asarray( [val for sec_id, val in demands.items() if section_status[sec_id] == "C"] ) csrdv_frac_zero, fit_shape, fit_loc, fit_scale = (None, None, None, None) if len(closed_sections_demand_values) > 0: closed_sections_positive_demand_values = closed_sections_demand_values[ np.where(closed_sections_demand_values > 0) ] csrdv_frac_zero = 1 - len(closed_sections_positive_demand_values) / len( closed_sections_demand_values ) if len(closed_sections_positive_demand_values) > 0: fit_shape, fit_loc, fit_scale = stats.lognorm.fit( closed_sections_positive_demand_values ) latest_popularity_dist_estimate = PcaDemandDistributionEstimate( created_at=date, semester=semester, highest_demand_section=section_id_to_object[max_id], highest_demand_section_volume=registration_volumes[max_id], lowest_demand_section=section_id_to_object[min_id], lowest_demand_section_volume=registration_volumes[min_id], csrdv_frac_zero=csrdv_frac_zero, csprdv_lognorm_param_shape=fit_shape, csprdv_lognorm_param_loc=fit_loc, csprdv_lognorm_param_scale=fit_scale, ) latest_popularity_dist_estimate.save(add_drop_period=add_drop_period) latest_popularity_dist_estimate.created_at = date latest_popularity_dist_estimate.save(add_drop_period=add_drop_period) else: num_changes_without_estimate += 1 if set_cache: if latest_popularity_dist_estimate is not None: cache.set( "current_demand_distribution_estimate", latest_popularity_dist_estimate, timeout=None, ) else: cache.set("current_demand_distribution_estimate", None, timeout=None) if verbose: print( "Finished recomputing demand distribution estimate and section registration_volume " f"fields for semesters {str(semesters)}." )
def recompute_percent_open(semesters=None, verbose=False, semesters_precomputed=False): """ Recomputes the percent_open field for each section in the given semester(s). :param semesters: The semesters argument should be a comma-separated list of string semesters corresponding to the semesters for which you want to recompute percent_open fields, i.e. "2019C,2020A,2020C" for fall 2019, spring 2020, and fall 2020. It defaults to None, in which case only the current semester is used. If you supply the string "all", it will recompute for all semesters found in Courses in the db. If semesters_precomputed is set to True (non-default), then this argument should instead be a list of single string semesters. :param semesters_precomputed: If False (default), the semesters argument will expect a raw comma-separated string input. If True, the semesters argument will expect a list of individual string semesters. :param verbose: Set to True if you want this script to print its status as it goes, or keep as False (default) if you want the script to work silently. """ current_semester = get_current_semester() semesters = ( semesters if semesters_precomputed else get_semesters(semesters=semesters, verbose=verbose) ) if verbose: print(f"Recomputing open percentages for semesters {str(semesters)}...") for semester_num, semester in enumerate(semesters): with transaction.atomic(): # We make this command an atomic transaction, so that the database will not # be modified unless the entire update for a semester succeeds. if verbose: print(f"\nProcessing semester {semester}, " f"{(semester_num+1)}/{len(semesters)}.") add_drop = get_or_create_add_drop_period(semester) add_drop_start = add_drop.estimated_start add_drop_end = add_drop.estimated_end StatusUpdate.objects.filter(section__course__semester=semester).select_for_update() sections = Section.objects.filter(course__semester=semester) num_erroneous_updates = 0 num_total_updates = 0 for section in sections: status_updates = StatusUpdate.objects.filter( section=section, created_at__gt=add_drop_start, created_at__lt=add_drop_end ).order_by("created_at") num_total_updates += len(status_updates) total_open_seconds = 0 if not status_updates.exists(): try: guess_status = ( StatusUpdate.objects.filter( section=section, created_at__lte=add_drop_start ) .latest("created_at") .new_status ) except StatusUpdate.DoesNotExist: guess_status = section.status section.percent_open = float(guess_status == "O") else: last_dt = add_drop_start last_status = status_updates.first().old_status for update in status_updates: if last_status != update.old_status: num_erroneous_updates += 1 if last_status == "O" and update.new_status != "O": total_open_seconds += (update.created_at - last_dt).total_seconds() last_dt = update.created_at last_status = update.new_status section.percent_open = float(total_open_seconds) / float( (status_updates.last().created_at - add_drop_start).total_seconds() ) if section.semester != current_semester: section.percent_open = float( total_open_seconds + int(last_status == "O") * (add_drop_end - last_dt).total_seconds() ) / float((add_drop_end - add_drop_start).total_seconds()) section.save() if verbose: print( f"Finished calculating percent_open for {len(sections)} sections from " f"semester {semester}, encountered {num_erroneous_updates} erroneous " f"Status Updates (out of {num_total_updates} total Status Updates)" ) if verbose: print(f"Finished recomputing open percentages for semesters {str(semesters)}.")