def run(self): app.logger.info('Starting intermediate table generation job...') sis_source_schema = app.config[ 'REDSHIFT_SCHEMA_EDL'] if feature_flag_edl( ) else app.config['REDSHIFT_SCHEMA_SIS'] resolved_ddl_redshift = resolve_sql_template( 'create_intermediate_schema.template.sql', current_term_id=current_term_id(), redshift_schema_sis=sis_source_schema, ) if redshift.execute_ddl_script(resolved_ddl_redshift): app.logger.info('Redshift tables generated.') else: raise BackgroundJobError('Intermediate table creation job failed.') resolved_ddl_rds = resolve_sql_template( 'update_rds_indexes_sis.template.sql') if rds.execute(resolved_ddl_rds): app.logger.info('RDS indexes updated.') else: raise BackgroundJobError( 'Failed to update RDS indexes for intermediate schema.') return 'Intermediate table generation job completed.'
def run(self): if feature_flag_edl(): app.logger.info('Starting EDL schema creation job...') self.create_schema() self.generate_feeds() return 'EDL schema creation job completed.' else: return 'Skipped EDL schema creation job because feature-flag is false.'
def student_schema_table(key): use_edl = feature_flag_edl() return { 'degree_progress': 'student_degree_progress' if use_edl else 'sis_api_degree_progress', 'sis_profiles': 'sis_profiles' if use_edl else 'sis_api_profiles', 'sis_profiles_hist_enr': 'sis_profiles_hist_enr' if use_edl else 'sis_api_profiles_hist_enr', 'student_demographics': 'student_demographics' if use_edl else 'student_api_demographics', }.get(key, key)
def run(self): app.logger.info('Starting SIS schema creation job...') if not self.update_manifests(): app.logger.info( 'Error updating manifests, will not execute schema creation SQL' ) return False app.logger.info('Executing SQL...') redshift.drop_external_schema(external_schema) resolved_ddl = resolve_sql_template('create_sis_schema.template.sql') if redshift.execute_ddl_script(resolved_ddl): verify_external_schema(external_schema, resolved_ddl, is_zero_count_acceptable=feature_flag_edl()) else: raise BackgroundJobError('SIS schema creation job failed.') return 'SIS schema creation job completed.'
def analyze_edl_registration_data(sid): # TODO: All 'analyze_edl' API endpoints must start with safety_check(). _safety_check() result = {} class MockRegistrationsJob(AbstractRegistrationsJob): def run(self, load_mode='new'): pass job = MockRegistrationsJob() demographics_key = 'demographics' if feature_flag_edl( ) else 'api_demographics' for key in ('edl', 'sis'): with _override_edl_feature_flag(key == 'edl'): result[key] = { 'term_gpas': [], 'last_registrations': [], demographics_key: [], } job.get_registration_data_per_sids(result[key], [sid]) return tolerant_jsonify(result)
def get_registration_data_per_sids(self, rows, sids, include_demographics=True): self.include_demographics = include_demographics return self._query_edl(rows, sids) if feature_flag_edl() else self._query_student_api(rows, sids)
class AbstractRegistrationsJob(BackgroundJob): demographics_key = 'demographics' if feature_flag_edl() else 'api_demographics' include_demographics = True @abstractmethod def run(self, load_mode='new'): pass def get_registration_data_per_sids(self, rows, sids, include_demographics=True): self.include_demographics = include_demographics return self._query_edl(rows, sids) if feature_flag_edl() else self._query_student_api(rows, sids) def _query_edl(self, rows, sids): successes = [] for edl_row in get_edl_student_registrations(sids): sid = edl_row['student_id'] if sid not in successes: # Based on the SQL order_by, the first result per SID will be 'last_registration'. successes.append(sid) rows['last_registrations'].append( encoded_tsv_row([sid, json.dumps(edl_registration_to_json(edl_row))]), ) rows['term_gpas'].append( encoded_tsv_row( [ sid, edl_row['term_id'], edl_row['current_term_gpa'] or '0', edl_row.get('unt_taken_gpa') or '0', # TODO: Does EDL give us 'unitsTakenForGpa'? ], ), ) if self.include_demographics: rows[self.demographics_key].append( encoded_tsv_row([sid, json.dumps(edl_demographics_to_json(edl_row))]), ) failures = list(np.setdiff1d(sids, successes)) return successes, failures def _query_student_api(self, rows, sids): successes = [] failures = [] app_obj = app._get_current_object() start_loop = timer() with ThreadPoolExecutor(max_workers=app.config['STUDENT_API_MAX_THREADS']) as executor: for result in executor.map(self._async_get_feed, repeat(app_obj), sids): sid = result['sid'] full_feed = result['feed'] if full_feed: successes.append(sid) rows['last_registrations'].append( encoded_tsv_row([sid, json.dumps(full_feed.get('last_registration', {}))]), ) gpa_feed = full_feed.get('term_gpas', {}) if gpa_feed: for term_id, term_data in gpa_feed.items(): row = [ sid, term_id, (term_data.get('gpa') or '0'), (term_data.get('unitsTakenForGpa') or '0'), ] rows['term_gpas'].append(encoded_tsv_row(row)) else: app.logger.info(f'No past UGRD registrations found for SID {sid}.') demographics = full_feed.get('demographics', {}) if demographics: rows[self.demographics_key].append( encoded_tsv_row([sid, json.dumps(demographics)]), ) else: failures.append(sid) app.logger.error(f'Registration history import failed for SID {sid}.') app.logger.info(f'Wanted {len(sids)} students; got {len(successes)} in {timer() - start_loop} secs') return successes, failures def _async_get_feed(self, app_obj, sid): with app_obj.app_context(): app.logger.info(f'Fetching registration history for SID {sid}') feed = sis_student_api.get_term_gpas_registration_demog(sid, self.include_demographics) result = { 'sid': sid, 'feed': feed, } return result
def sis_schema_table(key): use_edl = feature_flag_edl() return { 'minors': 'student_minors' if use_edl else 'minors', }.get(key, key)
def student_schema(): return app.config['REDSHIFT_SCHEMA_EDL'] if feature_flag_edl( ) else app.config['REDSHIFT_SCHEMA_STUDENT']
def get_s3_sis_api_daily_path(cutoff=None, use_edl_if_feature_flag=False): # Path for stashed SIS API data that doesn't need to be queried by Redshift Spectrum. use_edl = feature_flag_edl() and use_edl_if_feature_flag key = 'LOCH_S3_EDL_DATA_PATH' if use_edl else 'LOCH_S3_SIS_API_DATA_PATH' return f'{app.config[key]}/daily/{hashed_datestamp(cutoff)}'
def load(self, all_sids): return self._load_from_edl(all_sids) if feature_flag_edl( ) else self._load_from_student_api(all_sids)