def test_auto_terms(self, app, current_term_index): all_term_ids = set(berkeley.reverse_term_ids(include_future_terms=True, include_legacy_terms=True)) canvas_integrated_term_ids = set(berkeley.reverse_term_ids()) assert canvas_integrated_term_ids < all_term_ids assert berkeley.current_term_id() == '2182' assert berkeley.future_term_id() == '2188' assert berkeley.s3_canvas_data_path_current_term() == 'canvas-data/term/spring-2018'
def run(self): app.logger.info('Starting intermediate table generation job...') sis_source_schema = app.config['REDSHIFT_SCHEMA_EDL'] if app.config[ 'FEATURE_FLAG_EDL_SIS_VIEWS'] else app.config['REDSHIFT_SCHEMA_SIS'] resolved_ddl_redshift = resolve_sql_template( 'create_intermediate_schema.template.sql', current_term_id=current_term_id(), redshift_schema_sis=sis_source_schema, ) if redshift.execute_ddl_script(resolved_ddl_redshift): app.logger.info('Redshift tables generated.') else: raise BackgroundJobError('Intermediate table creation job failed.') resolved_ddl_rds = resolve_sql_template( 'update_rds_indexes_sis.template.sql') if rds.execute(resolved_ddl_rds): app.logger.info('RDS indexes updated.') else: raise BackgroundJobError( 'Failed to update RDS indexes for intermediate schema.') return 'Intermediate table generation job completed.'
def test_term_id_lists(self, app): all_term_ids = set( berkeley.reverse_term_ids(include_future_terms=True, include_legacy_terms=True)) canvas_integrated_term_ids = set(berkeley.reverse_term_ids()) future_term_ids = set(berkeley.future_term_ids()) legacy_term_ids = set(berkeley.legacy_term_ids()) assert canvas_integrated_term_ids < all_term_ids assert berkeley.sis_term_id_for_name( app.config['EARLIEST_LEGACY_TERM']) in all_term_ids assert berkeley.sis_term_id_for_name( app.config['EARLIEST_TERM']) in all_term_ids assert berkeley.sis_term_id_for_name( app.config['CURRENT_TERM']) in all_term_ids assert berkeley.sis_term_id_for_name( app.config['FUTURE_TERM']) in all_term_ids assert berkeley.current_term_id() in canvas_integrated_term_ids assert berkeley.earliest_term_id() in canvas_integrated_term_ids assert future_term_ids.isdisjoint(canvas_integrated_term_ids) assert future_term_ids < all_term_ids assert berkeley.future_term_id() in future_term_ids assert legacy_term_ids.isdisjoint(canvas_integrated_term_ids) assert legacy_term_ids < all_term_ids assert berkeley.earliest_legacy_term_id() in berkeley.legacy_term_ids()
def async_get_feeds(app_obj, up_to_100_sids, as_of): with app_obj.app_context(): feeds = get_v2_by_sids_list(up_to_100_sids, term_id=current_term_id(), with_registration=True, as_of=as_of) result = { 'sids': up_to_100_sids, 'feeds': feeds, } return result
def run(self, term_id=None): job_id = self.generate_job_id() if not term_id: term_id = current_term_id() if app.config['TEST_CANVAS_COURSE_IDS']: canvas_course_ids = app.config['TEST_CANVAS_COURSE_IDS'] else: canvas_course_ids = [ row['canvas_course_id'] for row in get_enrolled_canvas_sites_for_term(term_id) ] app.logger.info( f'Starting Canvas grade change log import job {job_id} for term {term_id}, {len(canvas_course_ids)} course sites...' ) success_count = 0 failure_count = 0 index = 1 for course_id in canvas_course_ids: path = f'/api/v1/audit/grade_change/courses/{course_id}' s3_key = f'{get_s3_canvas_api_path()}/grade_change_log/grade_change_log_{course_id}' create_canvas_api_import_status( job_id=job_id, term_id=term_id, course_id=course_id, table_name='grade_change_log', ) app.logger.info( f'Fetching Canvas grade change log for course id {course_id}, term {term_id} ({index} of {len(canvas_course_ids)})', ) response = dispatch( 'import_canvas_api_data', data={ 'course_id': course_id, 'path': path, 's3_key': s3_key, 'job_id': job_id, }, ) if not response: app.logger.error( f'Canvas grade change log import failed for course id {course_id}.' ) update_canvas_api_import_status( job_id=job_id, course_id=course_id, status='error', ) failure_count += 1 else: success_count += 1 index += 1 return ( f'Canvas grade change log import completed for term {term_id}: {success_count} succeeded, ' f'{failure_count} failed.')
def test_inner_get_students(self, app): """Returns fixture data.""" oski_response = student_api._get_v2_by_sids_list( TEST_SID_LIST, term_id=current_term_id(), as_of=None, with_registration=True, with_contacts=True, ) assert oski_response assert oski_response.status_code == 200 students = oski_response.json()['apiResponse']['response']['students'] assert len(students) == 3
def app_config(): def _to_api_key(key): chunks = key.split('_') return f"{chunks[0].lower()}{''.join(chunk.title() for chunk in chunks[1:])}" return tolerant_jsonify( { **dict((_to_api_key(key), app.config[key] if key in app.config else None) for key in PUBLIC_CONFIGS), **{ 'currentEnrollmentTerm': current_term_name(), 'currentEnrollmentTermId': int(current_term_id()), 'futureTermId': int(future_term_id()), }, }, )
def app_config(): current_term_name = berkeley.current_term_name() current_term_id = berkeley.current_term_id() future_term_id = berkeley.future_term_id() return tolerant_jsonify({ 'currentEnrollmentTerm': current_term_name, 'currentEnrollmentTermId': int(current_term_id), 'futureTermId': int(future_term_id), 'ebEnvironment': app.config['EB_ENVIRONMENT'] if 'EB_ENVIRONMENT' in app.config else None, 'nessieEnv': app.config['NESSIE_ENV'], })
def run(self, term_id=None): if not term_id: term_id = current_term_id() if term_id == 'all': app.logger.info('Starting enrollments index job for all terms...') else: app.logger.info( f'Starting enrollments index job for term {term_id}...') with rds.transaction() as transaction: if self.refresh_enrollments_index(term_id, transaction): transaction.commit() app.logger.info('Refreshed RDS indexes.') else: transaction.rollback() raise BackgroundJobError('Failed to refresh RDS indexes.') return f'Enrollments index job completed for term {term_id}.'
def run(self): app.logger.info('Starting intermediate table generation job...') if app.config['FEATURE_FLAG_EDL_SIS_VIEWS']: sis_source_schema = app.config['REDSHIFT_SCHEMA_EDL'] where_clause_exclude_withdrawn = "AND en.enrollment_status_reason <> 'WDRW'" else: sis_source_schema = app.config['REDSHIFT_SCHEMA_SIS'] where_clause_exclude_withdrawn = f"""/* Enrollment with no primary section is likely a withdrawal. */ AND EXISTS ( SELECT en0.term_id, en0.section_id, en0.ldap_uid FROM {app.config['REDSHIFT_SCHEMA_SIS']}.enrollments en0 JOIN {app.config['REDSHIFT_SCHEMA_INTERMEDIATE']}.course_sections crs0 ON crs0.sis_section_id = en0.section_id AND crs0.sis_term_id = en0.term_id WHERE en0.term_id = en.term_id AND en0.ldap_uid = en.ldap_uid AND crs0.sis_course_name = crs.sis_course_name AND crs0.sis_primary = TRUE AND en0.enrollment_status != 'D' AND en0.grade != 'W' )""" resolved_ddl_redshift = resolve_sql_template( 'create_intermediate_schema.template.sql', current_term_id=current_term_id(), redshift_schema_sis=sis_source_schema, where_clause_exclude_withdrawn=where_clause_exclude_withdrawn, ) if redshift.execute_ddl_script(resolved_ddl_redshift): app.logger.info('Redshift tables generated.') else: raise BackgroundJobError('Intermediate table creation job failed.') resolved_ddl_rds = resolve_sql_template('update_rds_indexes_sis.template.sql') if rds.execute(resolved_ddl_rds): app.logger.info('RDS indexes updated.') else: raise BackgroundJobError('Failed to update RDS indexes for intermediate schema.') return 'Intermediate table generation job completed.'
def _find_last_registration(rows): last_registration = None for row in rows: # We prefer registration data from: 1) the current term; 2) failing that, the nearest past term; 3) failing that, # the nearest future term. Which is to say, skip future terms unless that's all we have. if (row['term_id'] > current_term_id()) and last_registration: continue # At present, terms spent as an Extension student are not included in Term GPAs (but see BOAC-2266). # However, if there are no other types of registration, the Extension term is used for academicCareer. if row['academic_career_cd'] == 'UCBX': if last_registration and last_registration[ 'academic_career_cd'] != 'UCBX': continue last_registration = row return last_registration
def generate_or_fetch_merged_profile(self, term_id, sid, calnet_profile): merged_profile = None if term_id is None or term_id == berkeley.current_term_id(): merged_profile = self.generate_merged_profile(sid, calnet_profile) else: profile_result = redshift.fetch( 'SELECT profile FROM {schema}.student_profiles WHERE sid = %s', params=(sid, ), schema=self.destination_schema_identifier, ) merged_profile = profile_result and profile_result[ 0] and json.loads(profile_result[0].get('profile', '{}')) if not merged_profile: merged_profile = self.generate_merged_profile( sid, calnet_profile) if not merged_profile: app.logger.error( f'Failed to generate merged profile for sid {sid}.') return merged_profile
def run(self): app.logger.info('Starting intermediate table generation job...') resolved_ddl_redshift = resolve_sql_template( 'create_intermediate_schema.template.sql', current_term_id=current_term_id(), ) if redshift.execute_ddl_script(resolved_ddl_redshift): app.logger.info('Redshift tables generated.') else: raise BackgroundJobError('Intermediate table creation job failed.') resolved_ddl_rds = resolve_sql_template( 'update_rds_indexes_sis.template.sql') if rds.execute(resolved_ddl_rds): app.logger.info('RDS indexes updated.') else: raise BackgroundJobError( 'Failed to update RDS indexes for intermediate schema.') return 'Intermediate table generation job completed.'
def generate_feeds(self): # Translation between canvas_user_id and UID/SID is needed to merge Canvas analytics data and SIS enrollment-based data. advisees_by_canvas_id = {} advisees_by_sid = {} self.successes = [] self.failures = [] profile_tables = self.generate_student_profile_tables( advisees_by_canvas_id, advisees_by_sid) if not profile_tables: raise BackgroundJobError( 'Failed to generate student profile tables.') feed_path = app.config['LOCH_S3_BOAC_ANALYTICS_DATA_PATH'] + '/feeds/' s3.upload_json(advisees_by_canvas_id, feed_path + 'advisees_by_canvas_id.json') upload_student_term_maps(advisees_by_sid) # Avoid processing Canvas analytics data for future terms and pre-CS terms. for term_id in (future_term_ids() + legacy_term_ids()): enrollment_term_map = s3.get_object_json( feed_path + f'enrollment_term_map_{term_id}.json') if enrollment_term_map: GenerateMergedEnrollmentTerm().refresh_student_enrollment_term( term_id, enrollment_term_map) canvas_integrated_term_ids = reverse_term_ids() app.logger.info( f'Will queue analytics generation for {len(canvas_integrated_term_ids)} terms on worker nodes.' ) result = queue_merged_enrollment_term_jobs(self.job_id, canvas_integrated_term_ids) if not result: raise BackgroundJobError('Failed to queue enrollment term jobs.') refresh_all_from_staging(profile_tables) self.update_redshift_academic_standing() self.update_rds_profile_indexes() app.logger.info( 'Profile generation complete; waiting for enrollment term generation to finish.' ) while True: sleep(1) enrollment_results = get_merged_enrollment_term_job_status( self.job_id) if not enrollment_results: raise BackgroundJobError('Failed to refresh RDS indexes.') any_pending_job = next( (row for row in enrollment_results if row['status'] == 'created' or row['status'] == 'started'), None) if not any_pending_job: break app.logger.info('Exporting analytics data for archival purposes.') unload_enrollment_terms([current_term_id(), future_term_id()]) app.logger.info('Refreshing enrollment terms in RDS.') with rds.transaction() as transaction: if self.refresh_rds_enrollment_terms(None, transaction): transaction.commit() app.logger.info('Refreshed RDS enrollment terms.') else: transaction.rollback() raise BackgroundJobError( 'Failed to refresh RDS enrollment terms.') status_string = f'Generated merged profiles ({len(self.successes)} successes, {len(self.failures)} failures).' errored = False for row in enrollment_results: status_string += f" {row['details']}" if row['status'] == 'error': errored = True truncate_staging_table('student_enrollment_terms') if errored: raise BackgroundJobError(status_string) else: return status_string
def run(self, term_id=None): if not term_id: term_id = current_term_id() canvas_course_ids = [ row['canvas_course_id'] for row in get_enrolled_canvas_sites_for_term(term_id) ] app.logger.info( f'Starting Canvas enrollments API import job for term {term_id}, {len(canvas_course_ids)} course sites...' ) rows = [] success_count = 0 failure_count = 0 index = 1 for course_id in canvas_course_ids: app.logger.info( f'Fetching Canvas enrollments API for course id {course_id}, term {term_id} ({index} of {len(canvas_course_ids)})' ) feed = canvas_api.get_course_enrollments(course_id) if feed: success_count += 1 for enrollment in feed: user_id = str(enrollment.get('user_id')) last_activity_at = str( enrollment.get('last_activity_at') or '') rows.append('\t'.join([ str(course_id), user_id, str(term_id), last_activity_at, json.dumps(enrollment) ])) else: failure_count += 1 app.logger.error( f'Canvas enrollments API import failed for course id {course_id}.' ) index += 1 s3_key = f'{get_s3_sis_api_daily_path()}/canvas_api_enrollments_{term_id}.tsv' app.logger.info(f'Will stash {success_count} feeds in S3: {s3_key}') if not s3.upload_data('\n'.join(rows), s3_key): app.logger.error('Error on S3 upload: aborting job.') return False app.logger.info('Will copy S3 feeds into Redshift...') query = resolve_sql_template_string( """ DELETE FROM {redshift_schema_student}_staging.canvas_api_enrollments WHERE term_id = '{term_id}'; COPY {redshift_schema_student}_staging.canvas_api_enrollments FROM '{loch_s3_sis_api_data_path}/canvas_api_enrollments_{term_id}.tsv' IAM_ROLE '{redshift_iam_role}' DELIMITER '\\t' TIMEFORMAT 'YYYY-MM-DDTHH:MI:SSZ'; DELETE FROM {redshift_schema_student}.canvas_api_enrollments WHERE term_id = '{term_id}' AND course_id IN (SELECT course_id FROM {redshift_schema_student}_staging.canvas_api_enrollments WHERE term_id = '{term_id}'); INSERT INTO {redshift_schema_student}.canvas_api_enrollments (SELECT * FROM {redshift_schema_student}_staging.canvas_api_enrollments WHERE term_id = '{term_id}'); DELETE FROM {redshift_schema_student}_staging.canvas_api_enrollments WHERE term_id = '{term_id}'; """, term_id=term_id, ) if not redshift.execute(query): app.logger.error('Error on Redshift copy: aborting job.') return False return ( f'Canvas enrollments API import completed for term {term_id}: {success_count} succeeded, ' f'{failure_count} failed.')
def run(self, term_id=None): if not term_id: term_id = current_term_id() canvas_course_ids = [ row['canvas_course_id'] for row in get_enrolled_canvas_sites_for_term(term_id) ] app.logger.info( f'Starting Canvas enrollments API import job for term {term_id}, {len(canvas_course_ids)} course sites...' ) rows = [] success_count = 0 failure_count = 0 index = 1 for course_id in canvas_course_ids: app.logger.info( f'Fetching Canvas enrollments API for course id {course_id}, term {term_id} ({index} of {len(canvas_course_ids)})' ) feed = canvas_api.get_course_enrollments(course_id) if feed: success_count += 1 for enrollment in feed: user_id = enrollment.get('user_id') last_activity_at = enrollment.get('last_activity_at') or '' rows.append( encoded_tsv_row([ course_id, user_id, term_id, last_activity_at, json.dumps(enrollment) ])) else: failure_count += 1 app.logger.error( f'Canvas enrollments API import failed for course id {course_id}.' ) index += 1 s3_key = f'{get_s3_sis_api_daily_path()}/canvas_api_enrollments/canvas_api_enrollments_{term_id}.tsv' app.logger.info(f'Will stash {success_count} feeds in S3: {s3_key}') if not s3.upload_tsv_rows(rows, s3_key): raise BackgroundJobError('Error on S3 upload: aborting job.') app.logger.info('Will copy S3 feeds into Redshift...') query = resolve_sql_template_string( """ CREATE EXTERNAL SCHEMA {redshift_schema_student}_staging_ext_tmp FROM data catalog DATABASE '{redshift_schema_student}_staging_ext_tmp' IAM_ROLE '{redshift_iam_role}' CREATE EXTERNAL DATABASE IF NOT EXISTS; CREATE EXTERNAL TABLE {redshift_schema_student}_staging_ext_tmp.canvas_api_enrollments ( course_id VARCHAR, user_id VARCHAR, term_id VARCHAR, last_activity_at TIMESTAMP, feed VARCHAR ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' STORED AS TEXTFILE LOCATION '{loch_s3_sis_api_data_path}/canvas_api_enrollments'; DELETE FROM {redshift_schema_student}_staging.canvas_api_enrollments WHERE term_id = '{term_id}'; INSERT INTO {redshift_schema_student}_staging.canvas_api_enrollments (SELECT * FROM {redshift_schema_student}_staging_ext_tmp.canvas_api_enrollments); DROP TABLE {redshift_schema_student}_staging_ext_tmp.canvas_api_enrollments; DROP SCHEMA {redshift_schema_student}_staging_ext_tmp; DELETE FROM {redshift_schema_student}.canvas_api_enrollments WHERE term_id = '{term_id}' AND course_id IN (SELECT course_id FROM {redshift_schema_student}_staging.canvas_api_enrollments WHERE term_id = '{term_id}'); INSERT INTO {redshift_schema_student}.canvas_api_enrollments (SELECT * FROM {redshift_schema_student}_staging.canvas_api_enrollments WHERE term_id = '{term_id}'); DELETE FROM {redshift_schema_student}_staging.canvas_api_enrollments WHERE term_id = '{term_id}'; """, term_id=term_id, ) if not redshift.execute_ddl_script(query): raise BackgroundJobError('Error on Redshift copy: aborting job.') return ( f'Canvas enrollments API import completed for term {term_id}: {success_count} succeeded, ' f'{failure_count} failed.')
def generate_feeds(self, term_id=None, sids=None): """Loop through all records stored in the Calnet external schema and write merged student data to the internal student schema.""" calnet_profiles = self.fetch_calnet_profiles(sids) # Jobs targeted toward a specific sid set (such as backfills) may return no CalNet profiles. Warn, don't error. if not calnet_profiles: app.logger.warn( f'No CalNet profiles returned, aborting job. (sids={sids})') return False # Jobs for non-current terms generate enrollment feeds only. if term_id and term_id != berkeley.current_term_id(): tables = ['student_enrollment_terms'] else: tables = [ 'student_profiles', 'student_academic_status', 'student_majors', 'student_enrollment_terms', 'student_holds' ] # In-memory storage for generated feeds prior to TSV output. self.rows = { 'student_profiles': [], 'student_academic_status': [], 'student_majors': [], 'student_enrollment_terms': [], 'student_holds': [], } # Track the results of course-level queries to avoid requerying. self.canvas_site_map = {} # Remove any old data from staging tables. for table in tables: redshift.execute( 'TRUNCATE {schema}.{table}', schema=self.staging_schema_identifier, table=psycopg2.sql.Identifier(table), ) app.logger.info( f'Will generate feeds for {len(calnet_profiles)} students (term_id={term_id}).' ) successes = [] failures = [] index = 1 for sid, profile_group in groupby(calnet_profiles, operator.itemgetter('sid')): app.logger.info( f'Generating feeds for sid {sid} ({index} of {len(calnet_profiles)})' ) index += 1 merged_profile = self.generate_or_fetch_merged_profile( term_id, sid, list(profile_group)[0]) if merged_profile: self.generate_merged_enrollment_terms(merged_profile, term_id) self.parse_holds(sid) successes.append(sid) else: failures.append(sid) for table in tables: if not self.rows[table]: continue self.upload_to_staging(table) if not self.verify_table(table): return False with redshift.transaction() as transaction: for table in tables: if not self.refresh_from_staging(table, term_id, sids, transaction): app.logger.error( f'Failed to refresh {self.destination_schema}.{table} from staging.' ) return False if not transaction.commit(): app.logger.error( f'Final transaction commit failed for {self.destination_schema}.' ) return False with rds.transaction() as transaction: if self.refresh_rds_indexes(sids, transaction): transaction.commit() app.logger.info('Refreshed RDS indexes.') else: transaction.rollback() app.logger.error('Failed to refresh RDS indexes.') return False update_merged_feed_status(term_id, successes, failures) app.logger.info(f'Updated merged feed status.') return f'Merged profile generation complete: {len(successes)} successes, {len(failures)} failures.'
def schedule_all_jobs(force=False): from nessie.jobs.create_calnet_schema import CreateCalNetSchema from nessie.jobs.create_canvas_schema import CreateCanvasSchema from nessie.jobs.create_coe_schema import CreateCoeSchema from nessie.jobs.create_sis_schema import CreateSisSchema from nessie.jobs.generate_asc_profiles import GenerateAscProfiles from nessie.jobs.generate_boac_analytics import GenerateBoacAnalytics from nessie.jobs.generate_intermediate_tables import GenerateIntermediateTables from nessie.jobs.generate_merged_student_feeds import GenerateMergedStudentFeeds from nessie.jobs.import_asc_athletes import ImportAscAthletes from nessie.jobs.import_calnet_data import ImportCalNetData from nessie.jobs.import_canvas_enrollments_api import ImportCanvasEnrollmentsApi from nessie.jobs.import_degree_progress import ImportDegreeProgress from nessie.jobs.import_lrs_incrementals import ImportLrsIncrementals from nessie.jobs.import_sis_enrollments_api import ImportSisEnrollmentsApi from nessie.jobs.import_sis_student_api import ImportSisStudentApi from nessie.jobs.refresh_boac_cache import RefreshBoacCache from nessie.jobs.resync_canvas_snapshots import ResyncCanvasSnapshots from nessie.jobs.sync_canvas_snapshots import SyncCanvasSnapshots schedule_job(sched, 'JOB_SYNC_CANVAS_SNAPSHOTS', SyncCanvasSnapshots, force) schedule_job(sched, 'JOB_RESYNC_CANVAS_SNAPSHOTS', ResyncCanvasSnapshots, force) schedule_chained_job( sched, 'JOB_IMPORT_STUDENT_POPULATION', [ CreateCoeSchema, ImportAscAthletes, GenerateAscProfiles, ImportCalNetData, CreateCalNetSchema, ], force, ) schedule_job(sched, 'JOB_IMPORT_DEGREE_PROGRESS', ImportDegreeProgress, force) schedule_job(sched, 'JOB_IMPORT_LRS_INCREMENTALS', ImportLrsIncrementals, force, truncate_lrs=True) schedule_job(sched, 'JOB_IMPORT_SIS_ENROLLMENTS', ImportSisEnrollmentsApi, force) schedule_job(sched, 'JOB_IMPORT_SIS_STUDENTS', ImportSisStudentApi, force) schedule_job(sched, 'JOB_IMPORT_CANVAS_ENROLLMENTS', ImportCanvasEnrollmentsApi, force) schedule_chained_job( sched, 'JOB_GENERATE_ALL_TABLES', [ CreateCanvasSchema, CreateSisSchema, GenerateIntermediateTables, GenerateBoacAnalytics, ], force, ) schedule_job( sched, 'JOB_GENERATE_CURRENT_TERM_FEEDS', GenerateMergedStudentFeeds, force, term_id=current_term_id(), backfill_new_students=True, ) schedule_job(sched, 'JOB_REFRESH_BOAC_CACHE', RefreshBoacCache, force)
def run(self, csids=None, term_id=None): if not csids: csids = [row['sid'] for row in get_all_student_ids()] if not term_id: term_id = current_term_id() app.logger.info( f'Starting SIS enrollments API import job for term {term_id}, {len(csids)} students...' ) rows = [] success_count = 0 no_enrollments_count = 0 failure_count = 0 index = 1 for csid in csids: app.logger.info( f'Fetching SIS enrollments API for SID {csid}, term {term_id} ({index} of {len(csids)})' ) feed = sis_enrollments_api.get_drops_and_midterms(csid, term_id) if feed: success_count += 1 rows.append('\t'.join( [str(csid), str(term_id), json.dumps(feed)])) elif feed is False: app.logger.info( f'SID {csid} returned no enrollments for term {term_id}.') no_enrollments_count += 1 else: failure_count += 1 app.logger.error( f'SIS enrollments API import failed for CSID {csid}.') index += 1 s3_key = f'{get_s3_sis_api_daily_path()}/drops_and_midterms_{term_id}.tsv' app.logger.info(f'Will stash {success_count} feeds in S3: {s3_key}') if not s3.upload_data('\n'.join(rows), s3_key): app.logger.error('Error on S3 upload: aborting job.') return False app.logger.info('Will copy S3 feeds into Redshift...') if not redshift.execute( f"DELETE FROM {self.destination_schema}_staging.sis_api_drops_and_midterms WHERE term_id = '{term_id}'" ): app.logger.error( 'Error truncating old staging rows: aborting job.') return False if not redshift.copy_tsv_from_s3( f'{self.destination_schema}_staging.sis_api_drops_and_midterms', s3_key): app.logger.error('Error on Redshift copy: aborting job.') return False staging_to_destination_query = resolve_sql_template_string( """ DELETE FROM {redshift_schema_student}.sis_api_drops_and_midterms WHERE term_id = '{term_id}' AND sid IN (SELECT sid FROM {redshift_schema_student}_staging.sis_api_drops_and_midterms WHERE term_id = '{term_id}'); INSERT INTO {redshift_schema_student}.sis_api_drops_and_midterms (SELECT * FROM {redshift_schema_student}_staging.sis_api_drops_and_midterms WHERE term_id = '{term_id}'); DELETE FROM {redshift_schema_student}_staging.sis_api_drops_and_midterms WHERE term_id = '{term_id}'; """, term_id=term_id, ) if not redshift.execute(staging_to_destination_query): app.logger.error('Error on Redshift copy: aborting job.') return False return ( f'SIS enrollments API import completed for term {term_id}: {success_count} succeeded, ' f'{no_enrollments_count} returned no enrollments, {failure_count} failed.' )