Exemplo n.º 1
0
    def test_term_id_lists(self, app):
        all_term_ids = set(
            berkeley.reverse_term_ids(include_future_terms=True,
                                      include_legacy_terms=True))
        canvas_integrated_term_ids = set(berkeley.reverse_term_ids())
        future_term_ids = set(berkeley.future_term_ids())
        legacy_term_ids = set(berkeley.legacy_term_ids())
        assert canvas_integrated_term_ids < all_term_ids
        assert berkeley.sis_term_id_for_name(
            app.config['EARLIEST_LEGACY_TERM']) in all_term_ids
        assert berkeley.sis_term_id_for_name(
            app.config['EARLIEST_TERM']) in all_term_ids
        assert berkeley.sis_term_id_for_name(
            app.config['CURRENT_TERM']) in all_term_ids
        assert berkeley.sis_term_id_for_name(
            app.config['FUTURE_TERM']) in all_term_ids

        assert berkeley.current_term_id() in canvas_integrated_term_ids
        assert berkeley.earliest_term_id() in canvas_integrated_term_ids

        assert future_term_ids.isdisjoint(canvas_integrated_term_ids)
        assert future_term_ids < all_term_ids
        assert berkeley.future_term_id() in future_term_ids

        assert legacy_term_ids.isdisjoint(canvas_integrated_term_ids)
        assert legacy_term_ids < all_term_ids
        assert berkeley.earliest_legacy_term_id() in berkeley.legacy_term_ids()
Exemplo n.º 2
0
 def test_auto_terms(self, app, current_term_index):
     all_term_ids = set(berkeley.reverse_term_ids(include_future_terms=True, include_legacy_terms=True))
     canvas_integrated_term_ids = set(berkeley.reverse_term_ids())
     assert canvas_integrated_term_ids < all_term_ids
     assert berkeley.current_term_id() == '2182'
     assert berkeley.future_term_id() == '2188'
     assert berkeley.s3_canvas_data_path_current_term() == 'canvas-data/term/spring-2018'
Exemplo n.º 3
0
    def generate_merged_enrollment_terms(self, merged_profile, term_id=None):
        if term_id and term_id not in berkeley.reverse_term_ids():
            return
        elif term_id:
            term_ids = [term_id]
        else:
            term_ids = berkeley.reverse_term_ids()

        uid = merged_profile.get('uid')
        sid = merged_profile.get('sid')
        canvas_user_id = merged_profile.get('canvasUserId')

        canvas_courses_feed = get_canvas_courses_feed(uid)
        merge_canvas_site_map(self.canvas_site_map, canvas_courses_feed)
        terms_feed = get_merged_enrollment_terms(uid, sid, term_ids,
                                                 canvas_courses_feed,
                                                 self.canvas_site_map)
        term_gpas = self.fetch_term_gpas(sid)

        relative_submission_counts = get_relative_submission_counts(
            canvas_user_id)

        for term_id in term_ids:
            app.logger.debug(
                f'Generating merged enrollment term (uid={uid}, sid={sid}, term_id={term_id})'
            )
            ts = datetime.now().timestamp()
            term_feed = terms_feed.get(term_id)
            if term_feed and (len(term_feed['enrollments'])
                              or len(term_feed['unmatchedCanvasSites'])):
                term_gpa = next(
                    (t for t in term_gpas if t['term_id'] == term_id), None)
                if term_gpa:
                    term_feed['termGpa'] = {
                        'gpa': float(term_gpa['gpa']),
                        'unitsTakenForGpa':
                        float(term_gpa['units_taken_for_gpa']),
                    }
                # Rebuild our Canvas courses list to remove any courses that were screened out during association (for instance,
                # dropped or athletic enrollments).
                canvas_courses = []
                for enrollment in term_feed.get('enrollments', []):
                    canvas_courses += enrollment['canvasSites']
                canvas_courses += term_feed.get('unmatchedCanvasSites', [])
                # Decorate the Canvas courses list with per-course statistics and return summary statistics.
                app.logger.debug(
                    f'Generating enrollment term analytics (uid={uid}, sid={sid}, term_id={term_id})'
                )
                term_feed['analytics'] = mean_course_analytics_for_user(
                    canvas_courses,
                    canvas_user_id,
                    relative_submission_counts,
                    self.canvas_site_map,
                )
                self.rows['student_enrollment_terms'].append('\t'.join(
                    [str(sid), str(term_id),
                     json.dumps(term_feed)]))
            app.logger.debug(
                f'Enrollment term merge complete (uid={uid}, sid={sid}, term_id={term_id}, '
                f'{datetime.now().timestamp() - ts} seconds)')
Exemplo n.º 4
0
 def generate_student_enrollments_table(self, non_advisee_sids):
     # Split all S3/Redshift operations by term in hope of not overloading memory or other resources.
     # (Using finer-grained batches of SIDs would probably involve replacing the staging table by a Spectrum
     # external table.)
     total_count = 0
     table_name = 'student_enrollment_terms_hist_enr'
     student_schema.truncate_staging_table(table_name)
     for term_id in reverse_term_ids(include_future_terms=True,
                                     include_legacy_terms=True):
         with tempfile.TemporaryFile() as feed_file:
             term_count = self.collect_merged_enrollments(
                 non_advisee_sids, term_id, feed_file)
             if term_count:
                 student_schema.write_file_to_staging(
                     table_name,
                     feed_file,
                     term_count,
                     term_id,
                 )
         if term_count:
             with redshift.transaction() as transaction:
                 student_schema.refresh_from_staging(
                     table_name,
                     term_id,
                     non_advisee_sids,
                     transaction,
                 )
             total_count += term_count
     app.logger.info('Non-advisee term enrollment generation complete.')
     return total_count
 def merge_canvas_analytics_for_term(self, term_id, feed_path):
     if term_id not in reverse_term_ids():
         return {}
     canvas_site_map_path = feed_path + f'canvas_site_map_{term_id}.json'
     canvas_site_map = s3.get_object_json(canvas_site_map_path)
     if not canvas_site_map:
         raise BackgroundJobError(
             f'Failed to retrieve Canvas site map at {canvas_site_map_path}, aborting'
         )
     return canvas_site_map
Exemplo n.º 6
0
 def deduplicate(prefix, s3list):
     filename_map = {}
     for s3obj in s3list:
         m = re.match(r'.+/(.+\.gz)', s3obj['Key'])
         if m:
             filename_map[m[1]] = s3obj
     for term_id in reverse_term_ids(include_future_terms=True):
         filename = f'{prefix}-{term_id}.gz'
         if filename not in filename_map:
             raise BackgroundJobError(f'Expected filename {filename} not found in S3, aborting')
     return list(filename_map.values())
Exemplo n.º 7
0
    def generate_feeds(self):
        # Translation between canvas_user_id and UID/SID is needed to merge Canvas analytics data and SIS enrollment-based data.
        advisees_by_canvas_id = {}
        advisees_by_sid = {}
        self.successes = []
        self.failures = []
        profile_tables = self.generate_student_profile_tables(
            advisees_by_canvas_id, advisees_by_sid)
        if not profile_tables:
            raise BackgroundJobError(
                'Failed to generate student profile tables.')

        feed_path = app.config['LOCH_S3_BOAC_ANALYTICS_DATA_PATH'] + '/feeds/'
        s3.upload_json(advisees_by_canvas_id,
                       feed_path + 'advisees_by_canvas_id.json')

        upload_student_term_maps(advisees_by_sid)

        # Avoid processing Canvas analytics data for future terms and pre-CS terms.
        for term_id in (future_term_ids() + legacy_term_ids()):
            enrollment_term_map = s3.get_object_json(
                feed_path + f'enrollment_term_map_{term_id}.json')
            if enrollment_term_map:
                GenerateMergedEnrollmentTerm().refresh_student_enrollment_term(
                    term_id, enrollment_term_map)

        canvas_integrated_term_ids = reverse_term_ids()
        app.logger.info(
            f'Will queue analytics generation for {len(canvas_integrated_term_ids)} terms on worker nodes.'
        )
        result = queue_merged_enrollment_term_jobs(self.job_id,
                                                   canvas_integrated_term_ids)
        if not result:
            raise BackgroundJobError('Failed to queue enrollment term jobs.')

        refresh_all_from_staging(profile_tables)
        self.update_redshift_academic_standing()
        self.update_rds_profile_indexes()

        app.logger.info(
            'Profile generation complete; waiting for enrollment term generation to finish.'
        )

        while True:
            sleep(1)
            enrollment_results = get_merged_enrollment_term_job_status(
                self.job_id)
            if not enrollment_results:
                raise BackgroundJobError('Failed to refresh RDS indexes.')
            any_pending_job = next(
                (row for row in enrollment_results
                 if row['status'] == 'created' or row['status'] == 'started'),
                None)
            if not any_pending_job:
                break

        app.logger.info('Exporting analytics data for archival purposes.')
        unload_enrollment_terms([current_term_id(), future_term_id()])

        app.logger.info('Refreshing enrollment terms in RDS.')
        with rds.transaction() as transaction:
            if self.refresh_rds_enrollment_terms(None, transaction):
                transaction.commit()
                app.logger.info('Refreshed RDS enrollment terms.')
            else:
                transaction.rollback()
                raise BackgroundJobError(
                    'Failed to refresh RDS enrollment terms.')

        status_string = f'Generated merged profiles ({len(self.successes)} successes, {len(self.failures)} failures).'
        errored = False
        for row in enrollment_results:
            status_string += f" {row['details']}"
            if row['status'] == 'error':
                errored = True

        truncate_staging_table('student_enrollment_terms')
        if errored:
            raise BackgroundJobError(status_string)
        else:
            return status_string
Exemplo n.º 8
0
    def run(self, term_ids=None):
        if not term_ids:
            term_ids = reverse_term_ids()
        app.logger.info(
            f'Starting SIS terms API import job for {len(term_ids)} terms...')

        rows = []
        success_count = 0
        failure_count = 0
        index = 1
        for term_id in term_ids:
            app.logger.info(
                f'Fetching SIS terms API for term id {term_id} ({index} of {len(term_ids)})'
            )
            feed = sis_terms_api.get_term(term_id)
            if feed:
                success_count += 1
                for academic_career_term in feed:
                    for session in academic_career_term.get('sessions', []):
                        rows.append(
                            '\t'.join([
                                academic_career_term.get('id', ''),
                                academic_career_term.get('name', ''),
                                academic_career_term.get('academicCareer',
                                                         {}).get('code', ''),
                                academic_career_term.get('beginDate', ''),
                                academic_career_term.get('endDate', ''),
                                session.get('id', ''),
                                session.get('name', ''),
                                session.get('beginDate', ''),
                                session.get('endDate', ''),
                            ]), )
            else:
                failure_count += 1
                app.logger.error(
                    f'SIS terms API import failed for term id {term_id}.')
            index += 1

        s3_key = f'{get_s3_sis_api_daily_path()}/terms.tsv'
        app.logger.info(
            f'Will stash {len(rows)} rows from {success_count} feeds in S3: {s3_key}'
        )
        if not s3.upload_data('\n'.join(rows), s3_key):
            app.logger.error('Error on S3 upload: aborting job.')
            return False

        app.logger.info('Will copy S3 feeds into Redshift...')
        with redshift.transaction() as transaction:
            if self.update_redshift(term_ids, transaction):
                transaction.commit()
                app.logger.info('Updated Redshift.')
            else:
                transaction.rollback()
                app.logger.error('Failed to update Redshift.')
                return False

        with rds.transaction() as transaction:
            if self.update_rds(rows, term_ids, transaction):
                transaction.commit()
                app.logger.info('Updated RDS.')
            else:
                transaction.rollback()
                app.logger.error('Failed to update RDS.')
                return False

        return f'SIS terms API import job completed: {success_count} succeeded, {failure_count} failed.'