示例#1
0
    def run(self):
        app.logger.info('Starting intermediate table generation job...')

        sis_source_schema = app.config[
            'REDSHIFT_SCHEMA_EDL'] if feature_flag_edl(
            ) else app.config['REDSHIFT_SCHEMA_SIS']

        resolved_ddl_redshift = resolve_sql_template(
            'create_intermediate_schema.template.sql',
            current_term_id=current_term_id(),
            redshift_schema_sis=sis_source_schema,
        )
        if redshift.execute_ddl_script(resolved_ddl_redshift):
            app.logger.info('Redshift tables generated.')
        else:
            raise BackgroundJobError('Intermediate table creation job failed.')

        resolved_ddl_rds = resolve_sql_template(
            'update_rds_indexes_sis.template.sql')
        if rds.execute(resolved_ddl_rds):
            app.logger.info('RDS indexes updated.')
        else:
            raise BackgroundJobError(
                'Failed to update RDS indexes for intermediate schema.')

        return 'Intermediate table generation job completed.'
示例#2
0
 def run(self):
     if feature_flag_edl():
         app.logger.info('Starting EDL schema creation job...')
         self.create_schema()
         self.generate_feeds()
         return 'EDL schema creation job completed.'
     else:
         return 'Skipped EDL schema creation job because feature-flag is false.'
示例#3
0
def student_schema_table(key):
    use_edl = feature_flag_edl()
    return {
        'degree_progress':
        'student_degree_progress' if use_edl else 'sis_api_degree_progress',
        'sis_profiles':
        'sis_profiles' if use_edl else 'sis_api_profiles',
        'sis_profiles_hist_enr':
        'sis_profiles_hist_enr' if use_edl else 'sis_api_profiles_hist_enr',
        'student_demographics':
        'student_demographics' if use_edl else 'student_api_demographics',
    }.get(key, key)
示例#4
0
 def run(self):
     app.logger.info('Starting SIS schema creation job...')
     if not self.update_manifests():
         app.logger.info(
             'Error updating manifests, will not execute schema creation SQL'
         )
         return False
     app.logger.info('Executing SQL...')
     redshift.drop_external_schema(external_schema)
     resolved_ddl = resolve_sql_template('create_sis_schema.template.sql')
     if redshift.execute_ddl_script(resolved_ddl):
         verify_external_schema(external_schema,
                                resolved_ddl,
                                is_zero_count_acceptable=feature_flag_edl())
     else:
         raise BackgroundJobError('SIS schema creation job failed.')
     return 'SIS schema creation job completed.'
示例#5
0
def analyze_edl_registration_data(sid):
    # TODO: All 'analyze_edl' API endpoints must start with safety_check().
    _safety_check()
    result = {}

    class MockRegistrationsJob(AbstractRegistrationsJob):
        def run(self, load_mode='new'):
            pass

    job = MockRegistrationsJob()
    demographics_key = 'demographics' if feature_flag_edl(
    ) else 'api_demographics'

    for key in ('edl', 'sis'):
        with _override_edl_feature_flag(key == 'edl'):
            result[key] = {
                'term_gpas': [],
                'last_registrations': [],
                demographics_key: [],
            }
            job.get_registration_data_per_sids(result[key], [sid])

    return tolerant_jsonify(result)
示例#6
0
 def get_registration_data_per_sids(self, rows, sids, include_demographics=True):
     self.include_demographics = include_demographics
     return self._query_edl(rows, sids) if feature_flag_edl() else self._query_student_api(rows, sids)
示例#7
0
class AbstractRegistrationsJob(BackgroundJob):

    demographics_key = 'demographics' if feature_flag_edl() else 'api_demographics'
    include_demographics = True

    @abstractmethod
    def run(self, load_mode='new'):
        pass

    def get_registration_data_per_sids(self, rows, sids, include_demographics=True):
        self.include_demographics = include_demographics
        return self._query_edl(rows, sids) if feature_flag_edl() else self._query_student_api(rows, sids)

    def _query_edl(self, rows, sids):
        successes = []
        for edl_row in get_edl_student_registrations(sids):
            sid = edl_row['student_id']
            if sid not in successes:
                # Based on the SQL order_by, the first result per SID will be 'last_registration'.
                successes.append(sid)
                rows['last_registrations'].append(
                    encoded_tsv_row([sid, json.dumps(edl_registration_to_json(edl_row))]),
                )
            rows['term_gpas'].append(
                encoded_tsv_row(
                    [
                        sid,
                        edl_row['term_id'],
                        edl_row['current_term_gpa'] or '0',
                        edl_row.get('unt_taken_gpa') or '0',  # TODO: Does EDL give us 'unitsTakenForGpa'?
                    ],
                ),
            )
            if self.include_demographics:
                rows[self.demographics_key].append(
                    encoded_tsv_row([sid, json.dumps(edl_demographics_to_json(edl_row))]),
                )
        failures = list(np.setdiff1d(sids, successes))
        return successes, failures

    def _query_student_api(self, rows, sids):
        successes = []
        failures = []
        app_obj = app._get_current_object()
        start_loop = timer()

        with ThreadPoolExecutor(max_workers=app.config['STUDENT_API_MAX_THREADS']) as executor:
            for result in executor.map(self._async_get_feed, repeat(app_obj), sids):
                sid = result['sid']
                full_feed = result['feed']
                if full_feed:
                    successes.append(sid)
                    rows['last_registrations'].append(
                        encoded_tsv_row([sid, json.dumps(full_feed.get('last_registration', {}))]),
                    )
                    gpa_feed = full_feed.get('term_gpas', {})
                    if gpa_feed:
                        for term_id, term_data in gpa_feed.items():
                            row = [
                                sid,
                                term_id,
                                (term_data.get('gpa') or '0'),
                                (term_data.get('unitsTakenForGpa') or '0'),
                            ]
                            rows['term_gpas'].append(encoded_tsv_row(row))
                    else:
                        app.logger.info(f'No past UGRD registrations found for SID {sid}.')
                    demographics = full_feed.get('demographics', {})
                    if demographics:
                        rows[self.demographics_key].append(
                            encoded_tsv_row([sid, json.dumps(demographics)]),
                        )
                else:
                    failures.append(sid)
                    app.logger.error(f'Registration history import failed for SID {sid}.')
        app.logger.info(f'Wanted {len(sids)} students; got {len(successes)} in {timer() - start_loop} secs')
        return successes, failures

    def _async_get_feed(self, app_obj, sid):
        with app_obj.app_context():
            app.logger.info(f'Fetching registration history for SID {sid}')
            feed = sis_student_api.get_term_gpas_registration_demog(sid, self.include_demographics)
            result = {
                'sid': sid,
                'feed': feed,
            }
        return result
示例#8
0
def sis_schema_table(key):
    use_edl = feature_flag_edl()
    return {
        'minors': 'student_minors' if use_edl else 'minors',
    }.get(key, key)
示例#9
0
def student_schema():
    return app.config['REDSHIFT_SCHEMA_EDL'] if feature_flag_edl(
    ) else app.config['REDSHIFT_SCHEMA_STUDENT']
示例#10
0
def get_s3_sis_api_daily_path(cutoff=None, use_edl_if_feature_flag=False):
    # Path for stashed SIS API data that doesn't need to be queried by Redshift Spectrum.
    use_edl = feature_flag_edl() and use_edl_if_feature_flag
    key = 'LOCH_S3_EDL_DATA_PATH' if use_edl else 'LOCH_S3_SIS_API_DATA_PATH'
    return f'{app.config[key]}/daily/{hashed_datestamp(cutoff)}'
 def load(self, all_sids):
     return self._load_from_edl(all_sids) if feature_flag_edl(
     ) else self._load_from_student_api(all_sids)