Пример #1
0
    def import_note_authors(self):
        notes_schema = app.config['RDS_SCHEMA_ADVISING_NOTES']

        advisor_attributes = self._advisor_attributes_by_sid(
        ) + self._advisor_attributes_by_uid(
        ) + self._advisor_attributes_by_email()
        if not advisor_attributes:
            raise BackgroundJobError('Failed to fetch note author attributes.')

        unique_advisor_attributes = list(
            {adv['uid']: adv
             for adv in advisor_attributes}.values())

        with rds.transaction() as transaction:
            insertable_rows = []
            for entry in unique_advisor_attributes:
                first_name, last_name = calnet.split_sortable_name(entry)
                insertable_rows.append(
                    tuple((entry.get('uid'), entry.get('csid'), first_name,
                           last_name, entry.get('campus_email'))))

            result = transaction.insert_bulk(
                f'INSERT INTO {notes_schema}.advising_note_authors (uid, sid, first_name, last_name, campus_email) VALUES %s',
                insertable_rows,
            )
            if result:
                transaction.commit()
                app.logger.info('Imported advising note author attributes.')
            else:
                transaction.rollback()
                raise BackgroundJobError(
                    'Failed to import advising note author attributes.')
Пример #2
0
    def refresh_sis_term_definitions(self):
        if self.feature_flag_edl:
            rows = redshift.fetch(f"""
                SELECT
                  semester_year_term_cd AS term_id,
                  semester_year_name_concat_2 as term_name,
                  TO_CHAR(semester_first_day_of_insr_dt, 'YYYY-MM-DD') AS term_begins,
                  TO_CHAR(term_end_dt, 'YYYY-MM-DD') AS term_ends
                FROM {edl_external_schema()}.student_academic_terms_data
                WHERE
                  semester_year_term_cd >= {app.config['EARLIEST_ACADEMIC_HISTORY_TERM_ID']}
                  AND academic_career_cd = 'UGRD'
                ORDER BY semester_year_term_cd
            """)
        else:
            rows = redshift.fetch(
                f'SELECT * FROM {self.redshift_schema}.term_definitions')

        if len(rows):
            with rds.transaction() as transaction:
                if self.refresh_rds(rows, transaction):
                    transaction.commit()
                    app.logger.info('Refreshed RDS indexes.')
                else:
                    transaction.rollback()
                    raise BackgroundJobError(
                        'Error refreshing RDS term definitions.')
Пример #3
0
def queue_merged_enrollment_term_jobs(master_job_id, term_ids):
    now = datetime.now().replace(microsecond=0).isoformat()

    def insertable_tuple(term_id):
        return tuple([
            master_job_id,
            term_id,
            'created',
            None,
            now,
            now,
        ])

    with rds.transaction() as transaction:
        insert_result = transaction.insert_bulk(
            f"""INSERT INTO {_rds_schema()}.merged_enrollment_term_job_queue
               (master_job_id, term_id, status, instance_id, created_at, updated_at)
                VALUES %s""",
            [insertable_tuple(term_id) for term_id in term_ids],
        )
        if insert_result:
            transaction.commit()
            return True
        else:
            transaction.rollback()
            return False
    def import_appointment_advisors(self):
        sis_notes_schema = app.config['RDS_SCHEMA_SIS_ADVISING_NOTES']
        advisor_schema_redshift = app.config['REDSHIFT_SCHEMA_ADVISOR_INTERNAL']

        advisor_sids_from_sis_appointments = set(
            [r['advisor_sid'] for r in rds.fetch(f'SELECT DISTINCT advisor_sid FROM {sis_notes_schema}.advising_appointments')],
        )
        advisor_sids_from_advisors = set(
            [r['sid'] for r in redshift.fetch(f'SELECT DISTINCT sid FROM {advisor_schema_redshift}.advisor_departments')],
        )
        advisor_sids = list(advisor_sids_from_sis_appointments | advisor_sids_from_advisors)

        advisor_attributes = calnet.client(app).search_csids(advisor_sids)
        if not advisor_attributes:
            raise BackgroundJobError('Failed to fetch note author attributes.')

        unique_advisor_attributes = list({adv['uid']: adv for adv in advisor_attributes}.values())

        with rds.transaction() as transaction:
            insertable_rows = []
            for entry in unique_advisor_attributes:
                first_name, last_name = calnet.split_sortable_name(entry)
                insertable_rows.append(tuple((entry.get('uid'), entry.get('csid'), first_name, last_name)))

            result = transaction.insert_bulk(
                f'INSERT INTO {sis_notes_schema}.advising_appointment_advisors (uid, sid, first_name, last_name) VALUES %s',
                insertable_rows,
            )
            if result:
                transaction.commit()
                app.logger.info('Imported appointment advisor attributes.')
            else:
                transaction.rollback()
                raise BackgroundJobError('Failed to import appointment advisor attributes.')
Пример #5
0
def update_photo_import_status(successes, failures, photo_not_found):
    rds.execute(
        f'DELETE FROM {_rds_schema()}.photo_import_status WHERE sid = ANY(%s)',
        params=(successes + failures + photo_not_found, ),
    )
    now = datetime.utcnow().isoformat()
    success_records = [tuple([sid, 'success', now]) for sid in successes]
    failure_records = [tuple([sid, 'failure', now]) for sid in failures]
    photo_not_found_records = [
        tuple([sid, 'photo_not_found', now]) for sid in photo_not_found
    ]
    rows = success_records + failure_records + photo_not_found_records
    with rds.transaction() as transaction:
        result = transaction.insert_bulk(
            f"""INSERT INTO {_rds_schema()}.photo_import_status
                (sid, status, updated_at)
                VALUES %s
            """,
            rows,
        )
        if result:
            transaction.commit()
        else:
            transaction.rollback()
            app.logger.error(
                'Error saving photo import status updates to RDS.')
Пример #6
0
    def refresh_current_term_index(self):
        today = datetime.now(pytz.utc).astimezone(
            pytz.timezone(app.config['TIMEZONE'])).date()
        current_term = self.get_sis_current_term(today)

        if current_term:
            current_term_id = current_term['term_id']

            # If today is one month or less before the end of the current term, or if the current term is summer,
            # include the next term.
            if current_term_id[3] == '5' or (current_term['term_ends'] -
                                             timedelta(weeks=4)) < today:
                future_term_id = next_term_id(current_term['term_id'])
                # ... and if the upcoming term is Summer, include the next Fall term as well.
                if future_term_id[3] == '5':
                    future_term_id = next_term_id(future_term_id)
            else:
                future_term_id = current_term_id

            with rds.transaction() as transaction:
                transaction.execute(
                    f'TRUNCATE {rds_schema}.current_term_index')
                columns = ['current_term_name', 'future_term_name']
                values = tuple([
                    current_term['term_name'],
                    term_name_for_sis_id(future_term_id)
                ])
                if transaction.execute(
                        f'INSERT INTO {rds_schema}.current_term_index ({", ".join(columns)}) VALUES {values} '
                ):
                    transaction.commit()
                else:
                    transaction.rollback()
                    raise BackgroundJobError(
                        'Error refreshing RDS current term index.')
Пример #7
0
 def refresh_sis_term_definitions(self):
     rows = redshift.fetch(
         f'SELECT * FROM {external_schema}.term_definitions')
     if len(rows):
         with rds.transaction() as transaction:
             if self.refresh_rds(rows, transaction):
                 transaction.commit()
                 app.logger.info('Refreshed RDS indexes.')
             else:
                 transaction.rollback()
                 raise BackgroundJobError(
                     'Error refreshing RDS term definitions.')
Пример #8
0
    def update_rds_profile_indexes(self):
        with rds.transaction() as transaction:
            if self.refresh_rds_indexes(None, transaction):
                transaction.commit()
                app.logger.info('Refreshed RDS indexes.')
            else:
                transaction.rollback()
                raise BackgroundJobError('Failed to refresh RDS indexes.')

        resolved_ddl_rds = resolve_sql_template(
            'update_rds_indexes_student_profiles.template.sql')
        if rds.execute(resolved_ddl_rds):
            app.logger.info('RDS student profile indexes updated.')
        else:
            raise BackgroundJobError(
                'Failed to update RDS student profile indexes.')
Пример #9
0
    def run(self, term_id=None):
        if not term_id:
            term_id = current_term_id()
        if term_id == 'all':
            app.logger.info('Starting enrollments index job for all terms...')
        else:
            app.logger.info(
                f'Starting enrollments index job for term {term_id}...')

        with rds.transaction() as transaction:
            if self.refresh_enrollments_index(term_id, transaction):
                transaction.commit()
                app.logger.info('Refreshed RDS indexes.')
            else:
                transaction.rollback()
                raise BackgroundJobError('Failed to refresh RDS indexes.')

        return f'Enrollments index job completed for term {term_id}.'
Пример #10
0
    def run(self):
        app.logger.info('Starting Undergrads schema creation job...')
        redshift.drop_external_schema(external_schema)
        resolved_ddl = resolve_sql_template('create_undergrads_schema.template.sql')
        if redshift.execute_ddl_script(resolved_ddl):
            app.logger.info('Undergrads external schema created.')
            verify_external_schema(external_schema, resolved_ddl)
        else:
            raise BackgroundJobError('Undergrads external schema creation failed.')
        undergrads_rows = redshift.fetch(f'SELECT * FROM {external_schema}.students ORDER by sid')

        with rds.transaction() as transaction:
            if self.refresh_rds_indexes(undergrads_rows, transaction):
                transaction.commit()
                app.logger.info('Refreshed RDS indexes.')
            else:
                transaction.rollback()
                raise BackgroundJobError('Error refreshing RDS indexes.')

        return 'Undergrads internal schema created.'
Пример #11
0
    def refresh_current_term_index(self):
        today = datetime.now(pytz.utc).astimezone(
            pytz.timezone(app.config['TIMEZONE'])).date()
        current_term = self.get_sis_current_term(today)

        if current_term:
            term_id = current_term['term_id']

            # Check if the advance enrollment period has started for the next two upcoming terms.
            future_term_id = term_id
            for _ in range(2):
                term_id = next_term_id(term_id)
                term = self.get_sis_term_for_id(term_id)
                advance_enrollment_period = 0
                if term_id[3] == '2':
                    advance_enrollment_period = 95
                elif term_id[3] == '5':
                    advance_enrollment_period = 124
                elif term_id[3] == '8':
                    advance_enrollment_period = 140
                if term['term_begins'] - timedelta(
                        days=advance_enrollment_period) < today:
                    future_term_id = term_id

            with rds.transaction() as transaction:
                transaction.execute(
                    f'TRUNCATE {rds_schema}.current_term_index')
                columns = ['current_term_name', 'future_term_name']
                values = tuple([
                    current_term['term_name'],
                    term_name_for_sis_id(future_term_id)
                ])
                if transaction.execute(
                        f'INSERT INTO {rds_schema}.current_term_index ({", ".join(columns)}) VALUES {values} '
                ):
                    transaction.commit()
                else:
                    transaction.rollback()
                    raise BackgroundJobError(
                        'Error refreshing RDS current term index.')
Пример #12
0
    def run(self, load_mode='new'):
        all_sids = [row['sid'] for row in get_all_student_ids()]
        previous_backfills = {row['sid'] for row in get_sids_with_registration_imports()}

        if load_mode == 'new':
            sids = list(set(all_sids).difference(previous_backfills))
        elif load_mode == 'batch':
            new_sids = list(set(all_sids).difference(previous_backfills))
            limit = app.config['CYCLICAL_API_IMPORT_BATCH_SIZE'] - len(new_sids)
            if limit > 0:
                oldest_backfills = [row['sid'] for row in get_active_sids_with_oldest_registration_imports(limit=limit)]
                sids = new_sids + oldest_backfills
            else:
                sids = new_sids
        elif load_mode == 'all':
            sids = all_sids

        app.logger.info(f'Starting registrations/demographics import job for {len(sids)} students...')

        rows = {
            'term_gpas': [],
            'last_registrations': [],
            'api_demographics': [],
        }
        successes, failures = self.get_registration_data_per_sids(rows, sids)
        if load_mode != 'new' and (len(successes) == 0) and (len(failures) > 0):
            raise BackgroundJobError('Failed to import registration histories: aborting job.')

        for key in rows.keys():
            s3_key = f'{get_s3_sis_api_daily_path(use_edl_if_feature_flag=True)}/{key}.tsv'
            app.logger.info(f'Will stash {len(successes)} feeds in S3: {s3_key}')
            if not s3.upload_tsv_rows(rows[key], s3_key):
                raise BackgroundJobError('Error on S3 upload: aborting job.')
            app.logger.info('Will copy S3 feeds into Redshift...')
            if not redshift.execute(f'TRUNCATE {student_schema()}_staging.student_{key}'):
                raise BackgroundJobError('Error truncating old staging rows: aborting job.')
            if not redshift.copy_tsv_from_s3(f'{student_schema()}_staging.student_{key}', s3_key):
                raise BackgroundJobError('Error on Redshift copy: aborting job.')
            staging_to_destination_query = resolve_sql_template_string(
                """
                DELETE FROM {student_schema}.student_{table_key}
                    WHERE sid IN
                    (SELECT sid FROM {student_schema}_staging.student_{table_key});
                INSERT INTO {student_schema}.student_{table_key}
                    (SELECT * FROM {student_schema}_staging.student_{table_key});
                TRUNCATE TABLE {student_schema}_staging.student_{table_key};
                """,
                table_key=key,
                student_schema=student_schema(),
            )
            if not redshift.execute(staging_to_destination_query):
                raise BackgroundJobError('Error inserting staging entries into destination: aborting job.')

        with rds.transaction() as transaction:
            if self.refresh_rds_indexes(sids, rows['term_gpas'], transaction):
                transaction.commit()
                app.logger.info('Refreshed RDS indexes.')
            else:
                transaction.rollback()
                raise BackgroundJobError('Failed to refresh RDS indexes.')

        update_registration_import_status(successes, failures)

        return (
            f'Registrations import completed: {len(successes)} succeeded, {len(failures)} failed.'
        )
Пример #13
0
    def generate_feeds(self):
        # Translation between canvas_user_id and UID/SID is needed to merge Canvas analytics data and SIS enrollment-based data.
        advisees_by_canvas_id = {}
        advisees_by_sid = {}
        self.successes = []
        self.failures = []
        profile_tables = self.generate_student_profile_tables(
            advisees_by_canvas_id, advisees_by_sid)
        if not profile_tables:
            raise BackgroundJobError(
                'Failed to generate student profile tables.')

        feed_path = app.config['LOCH_S3_BOAC_ANALYTICS_DATA_PATH'] + '/feeds/'
        s3.upload_json(advisees_by_canvas_id,
                       feed_path + 'advisees_by_canvas_id.json')

        upload_student_term_maps(advisees_by_sid)

        # Avoid processing Canvas analytics data for future terms and pre-CS terms.
        for term_id in (future_term_ids() + legacy_term_ids()):
            enrollment_term_map = s3.get_object_json(
                feed_path + f'enrollment_term_map_{term_id}.json')
            if enrollment_term_map:
                GenerateMergedEnrollmentTerm().refresh_student_enrollment_term(
                    term_id, enrollment_term_map)

        canvas_integrated_term_ids = reverse_term_ids()
        app.logger.info(
            f'Will queue analytics generation for {len(canvas_integrated_term_ids)} terms on worker nodes.'
        )
        result = queue_merged_enrollment_term_jobs(self.job_id,
                                                   canvas_integrated_term_ids)
        if not result:
            raise BackgroundJobError('Failed to queue enrollment term jobs.')

        refresh_all_from_staging(profile_tables)
        self.update_redshift_academic_standing()
        self.update_rds_profile_indexes()

        app.logger.info(
            'Profile generation complete; waiting for enrollment term generation to finish.'
        )

        while True:
            sleep(1)
            enrollment_results = get_merged_enrollment_term_job_status(
                self.job_id)
            if not enrollment_results:
                raise BackgroundJobError('Failed to refresh RDS indexes.')
            any_pending_job = next(
                (row for row in enrollment_results
                 if row['status'] == 'created' or row['status'] == 'started'),
                None)
            if not any_pending_job:
                break

        app.logger.info('Exporting analytics data for archival purposes.')
        unload_enrollment_terms([current_term_id(), future_term_id()])

        app.logger.info('Refreshing enrollment terms in RDS.')
        with rds.transaction() as transaction:
            if self.refresh_rds_enrollment_terms(None, transaction):
                transaction.commit()
                app.logger.info('Refreshed RDS enrollment terms.')
            else:
                transaction.rollback()
                raise BackgroundJobError(
                    'Failed to refresh RDS enrollment terms.')

        status_string = f'Generated merged profiles ({len(self.successes)} successes, {len(self.failures)} failures).'
        errored = False
        for row in enrollment_results:
            status_string += f" {row['details']}"
            if row['status'] == 'error':
                errored = True

        truncate_staging_table('student_enrollment_terms')
        if errored:
            raise BackgroundJobError(status_string)
        else:
            return status_string
Пример #14
0
    def run(self):
        app.logger.info('Starting ASC profile generation job...')
        asc_rows = redshift.fetch(
            'SELECT * FROM {schema}.students ORDER by sid, UPPER(team_name)',
            schema=asc_schema_identifier,
        )

        profile_rows = []
        sids_for_inactive_deletion = []

        for sid, rows_for_student in groupby(asc_rows,
                                             operator.itemgetter('sid')):
            rows_for_student = list(rows_for_student)
            # Since BOAC believes (falsely) that isActiveAsc and statusAsc are attributes of a student, not
            # a team membership, a bit of brutal simplification is needed. Students who are active in at least
            # one sport have inactive team memberships dropped.
            any_active_athletics = reduce(
                operator.or_, [r['active'] for r in rows_for_student], False)
            if any_active_athletics:
                rows_for_student = [r for r in rows_for_student if r['active']]
                sids_for_inactive_deletion.append(sid)
            athletics_profile = {
                'athletics': [],
                'inIntensiveCohort': rows_for_student[0]['intensive'],
                'isActiveAsc': rows_for_student[0]['active'],
                'statusAsc': rows_for_student[0]['status_asc'],
            }
            for row in rows_for_student:
                athletics_profile['athletics'].append({
                    'groupCode':
                    row['group_code'],
                    'groupName':
                    row['group_name'],
                    'name':
                    row['group_name'],
                    'teamCode':
                    row['team_code'],
                    'teamName':
                    row['team_name'],
                })

            profile_rows.append(
                encoded_tsv_row([sid, json.dumps(athletics_profile)]))

        s3_key = f'{get_s3_asc_daily_path()}/athletics_profiles.tsv'
        app.logger.info(
            f'Will stash {len(profile_rows)} feeds in S3: {s3_key}')
        if not s3.upload_tsv_rows(profile_rows, s3_key):
            raise BackgroundJobError('Error on S3 upload: aborting job.')

        app.logger.info('Will copy S3 feeds into Redshift...')
        query = resolve_sql_template_string(
            """
            TRUNCATE {redshift_schema_asc}.student_profiles;
            COPY {redshift_schema_asc}.student_profiles
                FROM '{loch_s3_asc_data_path}/athletics_profiles.tsv'
                IAM_ROLE '{redshift_iam_role}'
                DELIMITER '\\t';
            """, )
        if not redshift.execute(query):
            app.logger.error('Error on Redshift copy: aborting job.')
            return False

        with rds.transaction() as transaction:
            if self.refresh_rds_indexes(asc_rows, transaction):
                transaction.commit()
                app.logger.info('Refreshed RDS indexes.')
            else:
                transaction.rollback()
                raise BackgroundJobError('Error refreshing RDS indexes.')

        if sids_for_inactive_deletion:
            redshift.execute(
                f'DELETE FROM {asc_schema}.students WHERE active IS false AND sid = ANY(%s)',
                params=(sids_for_inactive_deletion, ),
            )
            rds.execute(
                f'DELETE FROM {asc_schema}.students WHERE active IS false AND sid = ANY(%s)',
                params=(sids_for_inactive_deletion, ),
            )

        return 'ASC profile generation complete.'
Пример #15
0
    def run(self):
        app.logger.info('Starting COE schema creation job...')
        redshift.drop_external_schema(external_schema)
        resolved_ddl = resolve_sql_template('create_coe_schema.template.sql')
        # TODO This DDL drops and recreates the internal schema before the external schema is verified. We
        # ought to set up proper staging in conjunction with verification. It's also possible that a persistent
        # external schema isn't needed.
        if redshift.execute_ddl_script(resolved_ddl):
            app.logger.info('COE external schema created.')
            verify_external_schema(external_schema, resolved_ddl)
        else:
            raise BackgroundJobError('COE external schema creation failed.')
        coe_rows = redshift.fetch(
            'SELECT * FROM {schema}.students ORDER by sid',
            schema=internal_schema_identifier,
        )

        profile_rows = []
        index = 1
        for sid, rows_for_student in groupby(coe_rows,
                                             operator.itemgetter('sid')):
            app.logger.info(
                f'Generating COE profile for SID {sid} ({index} of {len(coe_rows)})'
            )
            index += 1
            row_for_student = list(rows_for_student)[0]
            coe_profile = {
                'advisorUid': row_for_student.get('advisor_ldap_uid'),
                'gender': row_for_student.get('gender'),
                'ethnicity': row_for_student.get('ethnicity'),
                'minority': row_for_student.get('minority'),
                'didPrep': row_for_student.get('did_prep'),
                'prepEligible': row_for_student.get('prep_eligible'),
                'didTprep': row_for_student.get('did_tprep'),
                'tprepEligible': row_for_student.get('tprep_eligible'),
                'sat1read': row_for_student.get('sat1read'),
                'sat1math': row_for_student.get('sat1math'),
                'sat2math': row_for_student.get('sat2math'),
                'inMet': row_for_student.get('in_met'),
                'gradTerm': row_for_student.get('grad_term'),
                'gradYear': row_for_student.get('grad_year'),
                'probation': row_for_student.get('probation'),
                'status': row_for_student.get('status'),
            }
            profile_rows.append(encoded_tsv_row([sid,
                                                 json.dumps(coe_profile)]))

        s3_key = f'{get_s3_coe_daily_path()}/coe_profiles.tsv'
        app.logger.info(
            f'Will stash {len(profile_rows)} feeds in S3: {s3_key}')
        if not s3.upload_tsv_rows(profile_rows, s3_key):
            raise BackgroundJobError('Error on S3 upload: aborting job.')

        app.logger.info('Will copy S3 feeds into Redshift...')
        query = resolve_sql_template_string(
            """
            COPY {redshift_schema_coe}.student_profiles
                FROM '{loch_s3_coe_data_path}/coe_profiles.tsv'
                IAM_ROLE '{redshift_iam_role}'
                DELIMITER '\\t';
            """, )
        if not redshift.execute(query):
            raise BackgroundJobError('Error on Redshift copy: aborting job.')

        with rds.transaction() as transaction:
            if self.refresh_rds_indexes(coe_rows, transaction):
                transaction.commit()
                app.logger.info('Refreshed RDS indexes.')
            else:
                transaction.rollback()
                raise BackgroundJobError('Error refreshing RDS indexes.')

        return 'COE internal schema created.'
Пример #16
0
    def generate_feeds(self, term_id=None, sids=None):
        """Loop through all records stored in the Calnet external schema and write merged student data to the internal student schema."""
        calnet_profiles = self.fetch_calnet_profiles(sids)

        # Jobs targeted toward a specific sid set (such as backfills) may return no CalNet profiles. Warn, don't error.
        if not calnet_profiles:
            app.logger.warn(
                f'No CalNet profiles returned, aborting job. (sids={sids})')
            return False

        # Jobs for non-current terms generate enrollment feeds only.
        if term_id and term_id != berkeley.current_term_id():
            tables = ['student_enrollment_terms']
        else:
            tables = [
                'student_profiles', 'student_academic_status',
                'student_majors', 'student_enrollment_terms', 'student_holds'
            ]

        # In-memory storage for generated feeds prior to TSV output.
        self.rows = {
            'student_profiles': [],
            'student_academic_status': [],
            'student_majors': [],
            'student_enrollment_terms': [],
            'student_holds': [],
        }

        # Track the results of course-level queries to avoid requerying.
        self.canvas_site_map = {}

        # Remove any old data from staging tables.
        for table in tables:
            redshift.execute(
                'TRUNCATE {schema}.{table}',
                schema=self.staging_schema_identifier,
                table=psycopg2.sql.Identifier(table),
            )

        app.logger.info(
            f'Will generate feeds for {len(calnet_profiles)} students (term_id={term_id}).'
        )
        successes = []
        failures = []
        index = 1
        for sid, profile_group in groupby(calnet_profiles,
                                          operator.itemgetter('sid')):
            app.logger.info(
                f'Generating feeds for sid {sid} ({index} of {len(calnet_profiles)})'
            )
            index += 1
            merged_profile = self.generate_or_fetch_merged_profile(
                term_id, sid,
                list(profile_group)[0])
            if merged_profile:
                self.generate_merged_enrollment_terms(merged_profile, term_id)
                self.parse_holds(sid)
                successes.append(sid)
            else:
                failures.append(sid)

        for table in tables:
            if not self.rows[table]:
                continue
            self.upload_to_staging(table)
            if not self.verify_table(table):
                return False

        with redshift.transaction() as transaction:
            for table in tables:
                if not self.refresh_from_staging(table, term_id, sids,
                                                 transaction):
                    app.logger.error(
                        f'Failed to refresh {self.destination_schema}.{table} from staging.'
                    )
                    return False
            if not transaction.commit():
                app.logger.error(
                    f'Final transaction commit failed for {self.destination_schema}.'
                )
                return False

        with rds.transaction() as transaction:
            if self.refresh_rds_indexes(sids, transaction):
                transaction.commit()
                app.logger.info('Refreshed RDS indexes.')
            else:
                transaction.rollback()
                app.logger.error('Failed to refresh RDS indexes.')
                return False

        update_merged_feed_status(term_id, successes, failures)
        app.logger.info(f'Updated merged feed status.')

        return f'Merged profile generation complete: {len(successes)} successes, {len(failures)} failures.'
Пример #17
0
    def run(self, term_ids=None):
        if not term_ids:
            term_ids = reverse_term_ids()
        app.logger.info(
            f'Starting SIS terms API import job for {len(term_ids)} terms...')

        rows = []
        success_count = 0
        failure_count = 0
        index = 1
        for term_id in term_ids:
            app.logger.info(
                f'Fetching SIS terms API for term id {term_id} ({index} of {len(term_ids)})'
            )
            feed = sis_terms_api.get_term(term_id)
            if feed:
                success_count += 1
                for academic_career_term in feed:
                    for session in academic_career_term.get('sessions', []):
                        rows.append(
                            '\t'.join([
                                academic_career_term.get('id', ''),
                                academic_career_term.get('name', ''),
                                academic_career_term.get('academicCareer',
                                                         {}).get('code', ''),
                                academic_career_term.get('beginDate', ''),
                                academic_career_term.get('endDate', ''),
                                session.get('id', ''),
                                session.get('name', ''),
                                session.get('beginDate', ''),
                                session.get('endDate', ''),
                            ]), )
            else:
                failure_count += 1
                app.logger.error(
                    f'SIS terms API import failed for term id {term_id}.')
            index += 1

        s3_key = f'{get_s3_sis_api_daily_path()}/terms.tsv'
        app.logger.info(
            f'Will stash {len(rows)} rows from {success_count} feeds in S3: {s3_key}'
        )
        if not s3.upload_data('\n'.join(rows), s3_key):
            app.logger.error('Error on S3 upload: aborting job.')
            return False

        app.logger.info('Will copy S3 feeds into Redshift...')
        with redshift.transaction() as transaction:
            if self.update_redshift(term_ids, transaction):
                transaction.commit()
                app.logger.info('Updated Redshift.')
            else:
                transaction.rollback()
                app.logger.error('Failed to update Redshift.')
                return False

        with rds.transaction() as transaction:
            if self.update_rds(rows, term_ids, transaction):
                transaction.commit()
                app.logger.info('Updated RDS.')
            else:
                transaction.rollback()
                app.logger.error('Failed to update RDS.')
                return False

        return f'SIS terms API import job completed: {success_count} succeeded, {failure_count} failed.'
Пример #18
0
    def run(self, csids=None):
        if not csids:
            csids = [row['sid'] for row in get_all_student_ids()]

        app.logger.info(
            f'Starting term GPA import job for {len(csids)} students...')

        rows = []
        success_count = 0
        no_registrations_count = 0
        failure_count = 0
        index = 1
        for csid in csids:
            app.logger.info(
                f'Fetching term GPAs for SID {csid}, ({index} of {len(csids)})'
            )
            feed = sis_student_api.get_term_gpas(csid)
            if feed:
                success_count += 1
                for term_id, term_data in feed.items():
                    rows.append('\t'.join([
                        str(csid),
                        str(term_id),
                        str(term_data.get('gpa') or '0'),
                        str(term_data.get('unitsTakenForGpa') or '0')
                    ]))
            elif feed == {}:
                app.logger.info(f'No registrations found for SID {csid}.')
                no_registrations_count += 1
            else:
                failure_count += 1
                app.logger.error(f'Term GPA import failed for SID {csid}.')
            index += 1

        if success_count == 0:
            app.logger.error('Failed to import term GPAs: aborting job.')
            return False

        s3_key = f'{get_s3_sis_api_daily_path()}/term_gpas.tsv'
        app.logger.info(f'Will stash {success_count} feeds in S3: {s3_key}')
        if not s3.upload_data('\n'.join(rows), s3_key):
            app.logger.error('Error on S3 upload: aborting job.')
            return False

        app.logger.info('Will copy S3 feeds into Redshift...')
        if not redshift.execute(
                f'TRUNCATE {self.destination_schema}_staging.student_term_gpas'
        ):
            app.logger.error(
                'Error truncating old staging rows: aborting job.')
            return False
        if not redshift.copy_tsv_from_s3(
                f'{self.destination_schema}_staging.student_term_gpas',
                s3_key):
            app.logger.error('Error on Redshift copy: aborting job.')
            return False
        staging_to_destination_query = resolve_sql_template_string("""
            DELETE FROM {redshift_schema_student}.student_term_gpas
                WHERE sid IN
                (SELECT sid FROM {redshift_schema_student}_staging.student_term_gpas);
            INSERT INTO {redshift_schema_student}.student_term_gpas
                (SELECT * FROM {redshift_schema_student}_staging.student_term_gpas);
            TRUNCATE TABLE {redshift_schema_student}_staging.student_term_gpas;
            """)
        if not redshift.execute(staging_to_destination_query):
            app.logger.error(
                'Error inserting staging entries into destination: aborting job.'
            )
            return False

        with rds.transaction() as transaction:
            if self.refresh_rds_indexes(csids, rows, transaction):
                transaction.commit()
                app.logger.info('Refreshed RDS indexes.')
            else:
                transaction.rollback()
                app.logger.error('Failed to refresh RDS indexes.')
                return False

        return (
            f'Term GPA import completed: {success_count} succeeded, '
            f'{no_registrations_count} returned no registrations, {failure_count} failed.'
        )