def new_log(engine, timestamp_utc=None): """ Logs successful update of edu_data Arguments: engine - an sql engine instance timestamp_utc - timestamp value Returns: success - flag if the action was succesful error - error message """ if timestamp_utc is None: timestamp_utc = datetime.now(timezone.utc) session = session_open(engine) new_log_entry = LogsClass( code=CONST_LOG_CODE_SUCCESS, timestamp_utc=timestamp_utc, ) session.add(new_log_entry) session.flush() # session.commit() session_close(session) return True, None
def _find_new_subs(engine, prev_timestamp_utc): """ Looks for new subscriptions to be included in the summary since the last summary. Arguments: engine - an sql engine instance prev_timestamp_utc - timestamp of the previous summary Returns: success - flag if the action was succesful error - error message new_subs - a lits of new subscriptions to be included in the summary """ if prev_timestamp_utc is not None: timestamp_wh = DetailsClass.timestamp_utc >= prev_timestamp_utc else: timestamp_wh = True session = session_open(engine) new_subs = (session.query(DetailsClass).filter( DetailsClass.new_flag, timestamp_wh, ).order_by( DetailsClass.subscription_name.asc(), DetailsClass.timestamp_utc.asc(), ).all()) if len(new_subs) > 0: session.expunge_all() session_close(session) return True, None, new_subs
def get_subs_dict(engine): """ Returns a dictionary containing all subscription ids and their internal id numbers. Arguments: engine - an sql engine instance Returns: success - flag if the action was succesful error - error message subs_dict - subscription id/internal id dictionary """ session = session_open(engine) subs_list = (session.query(SubscriptionClass).with_entities( SubscriptionClass.guid, SubscriptionClass.id).all()) session.expunge_all() session_close(session) subs_dict = {} for (sub_guid, sub_id) in subs_list: subs_dict.update({sub_guid: sub_id}) return True, None, subs_dict
def _update_labs(engine, eduhub_df, course_dict): """ Updates the database with the labs eduhub crawl data and returns a dictionary containing all labs and their internal id numbers. Arguments: engine - an sql engine instance eduhub_df - pandas dataframe with the eduhub crawl data course_dict - course name/internal id dictionary Returns: success - flag if the action was succesful error - error message lab_dict - lab name/internal id dictionary """ lab_dict = {} # unique course/lab combinations try: labs = eduhub_df[[CONST_PD_COL_COURSE_NAME, CONST_PD_COL_LAB_NAME]] unique_labs = labs.drop_duplicates() except KeyError as exception: return False, exception, lab_dict session = session_open(engine) # inserting new labs for _, row in unique_labs.iterrows(): course_name = row[CONST_PD_COL_COURSE_NAME] lab_name = row[CONST_PD_COL_LAB_NAME] course_id = course_dict[course_name] try: lab_id = (session.query(LabClass).filter( LabClass.course_id == course_id).filter( LabClass.name == lab_name).first().id) except Exception: lab_id = 0 # create new lab if lab_id == 0: # new lab -> insert to the database new_lab = LabClass( name=lab_name, course_id=course_id, ) session.add(new_lab) session.flush() # session.commit() lab_dict.update({(course_name, lab_name): new_lab.id}) else: lab_dict.update({(course_name, lab_name): lab_id}) session_close(session) return True, None, lab_dict
def _update_subscriptions(engine, eduhub_df): """ Updates the database with the subscriptions eduhub crawl data and returns a dictionary containing all subscription ids and their internal id numbers. Arguments: engine - an sql engine instance eduhub_df - pandas dataframe with the eduhub crawl data Returns: success - flag if the action was succesful error - error message sub_dict - subscription id/internal id dictionary """ sub_dict = {} try: unique_subscription_ids = eduhub_df[CONST_PD_COL_SUB_ID].unique() except KeyError as exception: return False, exception, sub_dict if len(unique_subscription_ids) == 0: return False, "dataframe does not contain course names", sub_dict session = session_open(engine) # get the internal ids of the subscriptions that are already in # the database query_result = (session.query(SubscriptionClass).filter( SubscriptionClass.guid.in_(unique_subscription_ids)).with_entities( SubscriptionClass.guid, SubscriptionClass.id).all()) for (sub_guid, sub_id) in query_result: sub_dict.update({sub_guid: sub_id}) # insert new subscriptions in to the database for sub_guid in unique_subscription_ids: if sub_guid not in sub_dict.keys(): # new course -> insert to the database new_subscription = SubscriptionClass(guid=sub_guid, ) session.add(new_subscription) session.flush() # session.commit() sub_dict.update({sub_guid: new_subscription.id}) session_close(session) return True, None, sub_dict
def _find_upd_subs(engine, prev_timestamp_utc): """ Looks for updated subscriptions to be included in the summary since the last summary. Arguments: engine - an sql engine instance prev_timestamp_utc - timestamp of the previous summary Returns: success - flag if the action was succesful error - error message update_list - a list of tuples (before, after) of subscription details """ update_list = [] if prev_timestamp_utc is not None: timestamp_wh = DetailsClass.timestamp_utc >= prev_timestamp_utc else: timestamp_wh = True session = session_open(engine) upd_subs = (session.query(DetailsClass).filter( DetailsClass.update_flag, timestamp_wh, ).order_by( DetailsClass.subscription_name.asc(), DetailsClass.timestamp_utc.asc(), ).all()) if len(upd_subs) > 0: session.expunge_all() # for every updated subscription find its details before the update for latest_details in upd_subs: # get the latest details before prev_details = (session.query(DetailsClass).filter( DetailsClass.sub_id == latest_details.sub_id, DetailsClass.timestamp_utc < latest_details.timestamp_utc, ).order_by(desc(DetailsClass.timestamp_utc)).first()) session.expunge(prev_details) update_list.append((prev_details, latest_details)) session_close(session) return True, None, update_list
def _update_courses(engine, eduhub_df): """ Updates the database with the courses eduhub crawl data and returns a dictionary containing all courses and their internal id numbers. Arguments: engine - an sql engine instance eduhub_df - pandas dataframe with the eduhub crawl data Returns: success - flag if the action was succesful error - error message course_dict - course name/internal id dictionary """ course_dict = {} try: unique_courses = eduhub_df[CONST_PD_COL_COURSE_NAME].unique() except KeyError as exception: return False, exception, course_dict if len(unique_courses) == 0: return False, "dataframe does not contain course names", course_dict session = session_open(engine) # get the ids of the courses that are already in the database query_result = (session.query(CourseClass).filter( CourseClass.name.in_(unique_courses)).with_entities( CourseClass.name, CourseClass.id).all()) for (course_name, course_id) in query_result: course_dict.update({course_name: course_id}) # insert new courses in to the database for course_name in unique_courses: if course_name not in course_dict.keys(): # new course -> insert to the database new_course = CourseClass(name=course_name, ) session.add(new_course) session.flush() # session.commit() course_dict.update({course_name: new_course.id}) session_close(session) return True, None, course_dict
def get_latest_log_timestamp(engine): """ Gets the latest timestamp value. Arguments: engine - an sql engine instance Returns: success - flag if the action was succesful error - error message timestamp - timestamp value """ session = session_open(engine) timestamp = session.query(func.max(LogsClass.timestamp_utc)).first()[0] session.expunge_all() session_close(session) return True, None, timestamp
def _find_sent_notifications(engine, prev_timestamp_utc): """ Looks for updated subscriptions to be included in the summary since the last summary. Arguments: engine - an sql engine instance prev_timestamp_utc - timestamp of the previous summary Returns: success - flag if the action was succesful error - error message noti_list - a list of details when notifications were sent """ if prev_timestamp_utc is not None: wh_clause = or_( DetailsClass.new_notice_sent >= prev_timestamp_utc, DetailsClass.update_notice_sent >= prev_timestamp_utc, DetailsClass.expiry_notice_sent >= prev_timestamp_utc, DetailsClass.usage_notice_sent >= prev_timestamp_utc, ) else: wh_clause = or_( DetailsClass.new_notice_sent.isnot(None), DetailsClass.update_notice_sent.isnot(None), DetailsClass.expiry_notice_sent.isnot(None), DetailsClass.usage_notice_sent.isnot(None), ) session = session_open(engine) noti_list = (session.query(DetailsClass).filter(wh_clause).order_by( DetailsClass.subscription_name.asc(), DetailsClass.timestamp_utc.asc(), ).all()) if len(noti_list) > 0: session.expunge_all() return True, None, noti_list
def get_labs_dict(engine): """ Returns a dictionary containing all labs and their internal id numbers. Arguments: engine - an sql engine instance Returns: success - flag if the action was succesful error - error message labs_dict - lab name/internal id dictionary """ session = session_open(engine) labs_list = (session.query(LabClass).with_entities(LabClass.id, LabClass.course_id, LabClass.name).all()) courses_list = (session.query(CourseClass).with_entities( CourseClass.id, CourseClass.name).all()) session.expunge_all() session_close(session) courses_dict = {} for (course_id, course_name) in courses_list: courses_dict.update({course_id: course_name}) labs_dict = {} for (lab_id, course_id, lab_name) in labs_list: course_name = courses_dict[course_id] labs_dict.update({(course_name, lab_name): lab_id}) return True, None, labs_dict
def _indv_emails(engine, lab_dict, sub_dict, new_sub_list, upd_sub_list, timestamp_utc=None): """ Prepares and sends out individual emails for new and updated subscriptions. Arguments: engine - an sql engine instance lab_dict - lab name /internal id dictionary sub_dict - subscription id /internal id dictionary new_sub_list - a list of details of new subscriptions upd_sub_list - a list of tuple (before, after) of subscription details timestamp_utc - timestamp when emails were sent (for testing purposes) Returns: success - flag if the action was succesful error - error message new_count - the number of new subscription nutifications sent upd_count - the number of subscription update nutifications sent """ new_count = 0 upd_count = 0 session = session_open(engine) # Notifying about new subscriptions for new_sub in new_sub_list: # generating html content success, _, html_content = indiv_email_new(lab_dict, sub_dict, new_sub) if success: # sending email log( "Sending new subscription email to: %s " % (new_sub.subscription_users), level=1, ) success, _ = send_email( new_sub.subscription_users, CONST_EMAIL_SUBJECT_NEW, html_content, ) # let's note that the email was sent successfully if success: if timestamp_utc is not None: notice_sent_timestamp = timestamp_utc else: notice_sent_timestamp = datetime.now(timezone.utc) session.query(DetailsClass).filter( DetailsClass.id == new_sub.id).update( {"new_notice_sent": notice_sent_timestamp}) session.commit() new_count += 1 # Notifying about updates for _, sub_update in enumerate(upd_sub_list): prev_details = sub_update[0] new_details = sub_update[1] send_upd_email = False # check which subsciptions have chaged details if details_changed(prev_details, new_details): send_upd_email = True # # checks if the unchanged cancelled subscription's consumption # # has been updated, if so, sends a update notifcation email. # switching this functionality off as it takes few days (about 2) # for usage date to arrive and the service tends # to send too many emails. # if ( # ( # new_details.subscription_status.lower() # == CONST_SUB_CANCELLED.lower() # ) # and not details_changed(prev_details, new_details) # and ( # prev_details.handout_consumed != new_details.handout_consumed # ) # ): # send_upd_email = True if send_upd_email: success, _, html_content = indiv_email_upd(lab_dict, sub_dict, sub_update) if success: log( "Sending subscription update email to: %s " % (new_details.subscription_users), level=1, ) success, _ = send_email( new_details.subscription_users, CONST_EMAIL_SUBJECT_UPD, html_content, ) # let's note that the email was sent successfully if success: if timestamp_utc is not None: notice_sent_timestamp = timestamp_utc else: notice_sent_timestamp = datetime.now(timezone.utc) session.query(DetailsClass).filter( DetailsClass.id == new_details.id).update( {"update_notice_sent": notice_sent_timestamp}) session_close(session) upd_count += 1 session_close(session) return True, None, new_count, upd_count
def _update_details(engine, eduhub_df, lab_dict, sub_dict): """ Updates the database with the subscription/handouts details eduhub crawl data. Arguments: engine - an sql engine instance eduhub_df - pandas dataframe with the eduhub crawl data lab_dict - lab name /internal id dictionary sub_dict - subscription id /internal id dictionary Returns: success - flag if the action was succesful error - error message new_list - a list of details of new subscriptions update_list - a list of tuples (before, after) of subscription details """ new_list = [] update_list = [] session = session_open(engine) for sub_guid in sub_dict.keys(): # selecting all the entries for a particular subscription and # sorting them by the crawl time in ascending order sub_eduhub_df = eduhub_df[eduhub_df[CONST_PD_COL_SUB_ID] == sub_guid].sort_values( [CONST_PD_COL_CRAWL_TIME_UTC]) sub_id = sub_dict[sub_guid] # get the latest details before prev_details = (session.query(DetailsClass).filter( DetailsClass.sub_id == sub_id).order_by( desc(DetailsClass.timestamp_utc)).first()) if prev_details is not None: session.expunge(prev_details) # appending details for _, row in sub_eduhub_df.iterrows(): crawl_time = row[CONST_PD_COL_CRAWL_TIME_UTC] if (session.query(DetailsClass).filter( DetailsClass.sub_id == sub_id, DetailsClass.timestamp_utc == crawl_time, ).scalar() is None): if type(row[CONST_PD_COL_SUB_USERS]) is str: sub_users = ", ".join(eval(row[CONST_PD_COL_SUB_USERS])) elif type(row[CONST_PD_COL_SUB_USERS]) is list: sub_users = ", ".join(row[CONST_PD_COL_SUB_USERS]) else: sub_users = "" # While subscription is "Pending acceptance", # it doesn't show its expiry date if len(row[CONST_PD_COL_SUB_EXPIRY_DATE]) == 0: expiry_date = None else: expiry_date = datetime.strptime( row[CONST_PD_COL_SUB_EXPIRY_DATE], "%Y-%m-%d") # adds new record to the database new_sub_detail = DetailsClass( sub_id=sub_id, lab_id=lab_dict[( row[CONST_PD_COL_COURSE_NAME], row[CONST_PD_COL_LAB_NAME], )], handout_name=row[CONST_PD_COL_HANDOUT_NAME], handout_status=row[CONST_PD_COL_HANDOUT_STATUS], handout_budget=CONVERT_LAMBDA( row[CONST_PD_COL_HANDOUT_BUDGET]), handout_consumed=CONVERT_LAMBDA( row[CONST_PD_COL_HANDOUT_CONSUMED]), subscription_name=row[CONST_PD_COL_SUB_NAME], subscription_status=row[CONST_PD_COL_SUB_STATUS], subscription_expiry_date=expiry_date, subscription_users=sub_users, timestamp_utc=crawl_time, new_flag=(prev_details is None), ) if (prev_details is not None) and details_changed( prev_details, new_sub_detail): new_sub_detail.update_flag = True session.add(new_sub_detail) session.flush() # session.commit() # get the latest details after latest_details = (session.query(DetailsClass).filter( DetailsClass.sub_id == sub_id).order_by( desc(DetailsClass.timestamp_utc)).first()) session.expunge(latest_details) if prev_details is None: new_list.append(latest_details) else: update_list.append((prev_details, latest_details)) # if handout_budget has changed, # nullify usage_code and usage_notice_sent if prev_details.handout_budget != latest_details.handout_budget: session.query(SubscriptionClass).filter( SubscriptionClass.id == latest_details.sub_id).update({ "usage_code": None, "usage_notice_sent": None, }) session.commit() # if subscription_expiry_date has changed, # nullify expiry_code and expiry_notice_sent if (prev_details.subscription_expiry_date != latest_details.subscription_expiry_date): session.query(SubscriptionClass).filter( SubscriptionClass.id == latest_details.sub_id).update({ "expiry_code": None, "expiry_notice_sent": None, }) session.commit() session_close(session) return True, None, new_list, update_list
def notify_expire(engine, lab_dict, sub_dict, upd_sub_list, timestamp_utc=None): """ Checks remaining time for updated subscriptions and sends out time-based notifications. Notification 1: 1 day before end Notification 2: 7 days before end Notification 3: 30 days before end Arguments: engine - an sql engine instance lab_dict - lab name /internal id dictionary sub_dict - subscription id /internal id dictionary upd_sub_list - a list of tuple (before, after) of subscription details timestamp_utc - timestamp when emails were sent (for testing purposes) Returns: success - flag if the action was succesful error - error message count - the number of nutifications sent """ success = True error = None count = 0 if timestamp_utc is None: current_date = datetime.utcnow().date() else: current_date = timestamp_utc.date() session = session_open(engine) # Notifying updated subscriptions about expiry for _, sub_update in enumerate(upd_sub_list): expires = False expiry_code = None send_notification = False # prev_details = sub_update[0] new_details = sub_update[1] # only for active subscriptions. If subscription is cancelled, # it should have been notified if ( new_details.subscription_status.lower() == CONST_SUB_CANCELLED.lower() ): continue # subscription_expiry_date can be null if handout status is pending # acceptance if new_details.subscription_expiry_date is not None: # check the expiration notification code expires, expiry_code, remain_days = _check_remaining_time( new_details.subscription_expiry_date, current_date=current_date ) if not expires: continue else: continue # check the latest notification code sub_latest_noti_code = ( session.query(SubscriptionClass) .filter(SubscriptionClass.id == new_details.sub_id) .first() .expiry_code ) if sub_latest_noti_code is None: send_notification = True elif ( sub_latest_noti_code > expiry_code and expiry_code != CONST_EXPR_CODE_0 ): send_notification = True if send_notification: send_success, _ = _notify_expiring_sub( session, lab_dict, sub_dict, new_details, expiry_code, remain_days, timestamp_utc=timestamp_utc, ) if send_success: count += 1 session_close(session) return success, error, count
def notify_usage(engine, lab_dict, sub_dict, upd_sub_list, timestamp_utc=None): """ Checks remaining budgets for new and updated subscriptions and sends out usage-based notifications. Notification 1: 50% of monetary credit has been used Notification 2: 75% of monetary credit has been used Notification 3: 90% of monetary credit has been used Notification 4: 95% of monetary credit has been used Arguments: engine - an sql engine instance lab_dict - lab name /internal id dictionary sub_dict - subscription id /internal id dictionary upd_sub_list - a list of tuple (before, after) of subscription details timestamp_utc - timestamp when emails were sent (for testing purposes) Returns: success - flag if the action was succesful error - error message count - the number of nutifications sent """ success = True error = None count = 0 session = session_open(engine) # Notifying updated subscriptions for _, sub_update in enumerate(upd_sub_list): send_notification = False # prev_details = sub_update[0] new_details = sub_update[1] # only for active subscriptions. # If subscription is cancelled, it should have been alerady notified if (new_details.subscription_status.lower() == CONST_SUB_CANCELLED.lower()): continue # check the budget notification code notify, usage_code = _usage_notification(new_details.handout_budget, new_details.handout_consumed) if not notify: continue # check the latest notification code sub_latest_noti_code = (session.query(SubscriptionClass).filter( SubscriptionClass.id == new_details.sub_id).first().usage_code) if sub_latest_noti_code is None: send_notification = True elif usage_code > sub_latest_noti_code: send_notification = True if send_notification: send_success, _ = _notify_usage_sub( session, lab_dict, sub_dict, new_details, usage_code, timestamp_utc=timestamp_utc, ) if send_success: count += 1 session_close(session) return success, error, count