Exemplo n.º 1
0
def sis_api_profiles(app, student_tables):
    from nessie.externals import redshift
    from nessie.jobs.import_sis_student_api import ImportSisStudentApi
    with mock_s3(app):
        ImportSisStudentApi().run_wrapped()
    sql = """SELECT sid, feed FROM student_test.sis_api_profiles"""
    return redshift.fetch(sql)
Exemplo n.º 2
0
def sis_api_last_registrations(app, metadata_db, student_tables):
    from nessie.externals import redshift
    from nessie.jobs.import_registrations import ImportRegistrations
    with mock_s3(app):
        ImportRegistrations().run_wrapped()
    sql = """SELECT sid, feed FROM student_test.student_last_registrations"""
    return redshift.fetch(sql)
Exemplo n.º 3
0
    def test_malformed_filenames(self, app, caplog):
        (bucket, source_prefix, dest_prefix) = get_s3_refs(app)
        datestamp = 'all'

        caplog.set_level(logging.INFO)
        with capture_app_logs(app):
            with mock_s3(app, bucket=bucket) as m3:
                m3.Object(
                    bucket,
                    f'{source_prefix}/2019/08/28/12345678_00012_1_May_7_2019_email.pdf'
                ).put(Body=b'extra chars in my name lol')
                m3.Object(
                    bucket,
                    f'{source_prefix}/2019/08/28/23456789_00052_1.png.png'
                ).put(Body=b'somehow i got a redundant .ext')
                m3.Object(
                    bucket,
                    f'{source_prefix}/2019/08/29/23456789_00053_1._DEGREE_COMPLETION_LETTER'
                ).put(Body=b'original file name mistaken for the .ext', )
                m3.Object(
                    bucket,
                    f'{source_prefix}/2019/08/29/34567890_00014_2..7.19_(2)-edited_(1)-2_(1)_(1).xls'
                ).put(Body=b'is this a versioning scheme?', )

                MigrateSisAdvisingNoteAttachments().run(datestamp=datestamp)

                assert 'Copied 4 attachments to the destination folder.' in caplog.text
                assert object_exists(
                    m3, bucket, f'{dest_prefix}/12345678/12345678_00012_1.pdf')
                assert object_exists(
                    m3, bucket, f'{dest_prefix}/23456789/23456789_00052_1.png')
                assert object_exists(
                    m3, bucket, f'{dest_prefix}/23456789/23456789_00053_1')
                assert object_exists(
                    m3, bucket, f'{dest_prefix}/34567890/34567890_00014_2.xls')
Exemplo n.º 4
0
    def test_canvas_sync_metadata(self, app, metadata_db):
        """When given a job id, updates metadata on file sync."""
        url = 'http://shakespeare.mit.edu/Poetry/sonnet.XLV.html'
        key = 'canvas/sonnet_submission_dim/sonnet-xlv.txt'

        with mock_s3(app):
            with open(_get_fixtures_path() + '/sonnet_xlv.html', 'r') as file:
                responses.add(responses.GET,
                              url,
                              body=file.read(),
                              headers={'Content-Length': '767'})

            # Run two successive sync jobs on the same file. The first succeeds, the second is skipped as
            # a duplicate.
            metadata.create_canvas_sync_status('job_1', 'sonnet-xlv.txt',
                                               'sonnet_submission_dim', url)
            result = SyncFileToS3().run(url=url,
                                        key=key,
                                        canvas_sync_job_id='job_1')
            assert result is True
            metadata.create_canvas_sync_status('job_2', 'sonnet-xlv.txt',
                                               'sonnet_submission_dim', url)
            result = SyncFileToS3().run(url=url,
                                        key=key,
                                        canvas_sync_job_id='job_2')
            assert result is False

            schema = app.config['REDSHIFT_SCHEMA_METADATA']
            sync_metadata = redshift.fetch(
                f'SELECT * FROM {schema}.canvas_sync_job_status')
            snapshot_metadata = redshift.fetch(
                f'SELECT * FROM {schema}.canvas_synced_snapshots')

            assert len(sync_metadata) == 2
            assert sync_metadata[0]['job_id'] == 'job_1'
            assert sync_metadata[0][
                'destination_url'] == 's3://mock-bucket/canvas/sonnet_submission_dim/sonnet-xlv.txt'
            assert sync_metadata[0]['status'] == 'complete'
            assert sync_metadata[0]['source_size'] == 767
            assert sync_metadata[0]['destination_size'] == 767
            assert sync_metadata[0]['updated_at'] > sync_metadata[0][
                'created_at']
            assert sync_metadata[1]['job_id'] == 'job_2'
            assert sync_metadata[1][
                'destination_url'] == 's3://mock-bucket/canvas/sonnet_submission_dim/sonnet-xlv.txt'
            assert sync_metadata[1]['status'] == 'duplicate'
            assert sync_metadata[1]['source_size'] is None
            assert sync_metadata[1]['destination_size'] is None
            assert sync_metadata[1]['updated_at'] > sync_metadata[1][
                'created_at']

            assert len(snapshot_metadata) == 1
            assert snapshot_metadata[0]['filename'] == 'sonnet-xlv.txt'
            assert snapshot_metadata[0][
                'canvas_table'] == 'sonnet_submission_dim'
            assert snapshot_metadata[0][
                'url'] == 's3://mock-bucket/canvas/sonnet_submission_dim/sonnet-xlv.txt'
            assert snapshot_metadata[0]['size'] == 767
            assert snapshot_metadata[0]['created_at']
            assert snapshot_metadata[0]['deleted_at'] is None
Exemplo n.º 5
0
def set_up_to_succeed(app, caplog):
    (bucket, source_prefix, dest_prefix) = get_s3_refs(app)
    caplog.set_level(logging.INFO)
    with capture_app_logs(app):
        with mock_s3(app, bucket=bucket) as m3:
            m3.Object(bucket,
                      f'{source_prefix}/2017/01/18/12345678_00012_1.pdf').put(
                          Body=b'a note attachment')
            m3.Object(bucket,
                      f'{source_prefix}/2018/12/22/23456789_00003_1.png').put(
                          Body=b'another note attachment')
            m3.Object(bucket,
                      f'{source_prefix}/2019/08/29/34567890_00014_2.xls').put(
                          Body=b'yet another note attachment')
            m3.Object(bucket,
                      f'{dest_prefix}/12345678/12345678_00012_1.pdf').put(
                          Body=b'a note attachment')
            m3.Object(bucket,
                      f'{dest_prefix}/23456789/23456789_00003_1.png').put(
                          Body=b'another note attachment')
            m3.Object(bucket,
                      f'{dest_prefix}/34567890/34567890_00014_2.xls').put(
                          Body=b'yet another note attachment')
            yield
    assert 'No attachments missing on S3 when compared against the view.' in caplog.text
    def test_run(self, app, metadata_db):
        """Uploads Canvas grade change logs to S3, then stores feeds in Redshift."""
        with mock_s3(app):
            with override_config(app, 'TEST_CANVAS_COURSE_IDS',
                                 [1492459, 1488704, 1491827]):
                result = ImportCanvasGradeChangeLog().run_wrapped()
                assert result
                assert 'Canvas grade change log import completed for term 2178: 3 succeeded, ' in result
                assert '0 failed.' in result

        assert_background_job_status('ImportCanvasGradeChangeLog')
        schema = app.config['RDS_SCHEMA_METADATA']
        count_results = rds.fetch(
            f'SELECT count(*) FROM {schema}.canvas_api_import_job_status')
        assert count_results[0]['count'] == 3

        canvas_status_results = rds.fetch(
            f'SELECT DISTINCT status FROM {schema}.canvas_api_import_job_status'
        )
        assert len(canvas_status_results) == 1
        assert canvas_status_results[0]['status'] == 'created'

        sync_results = rds.fetch(
            f'SELECT * FROM {schema}.canvas_api_import_job_status LIMIT 1')
        assert sync_results[0]['job_id'].startswith(
            'ImportCanvasGradeChangeLog_')
        assert sync_results[0]['course_id'] == '1492459'
        assert sync_results[0]['table_name'] == 'grade_change_log'
        assert sync_results[0]['details'] is None
        assert sync_results[0]['created_at']
        assert sync_results[0]['updated_at']
Exemplo n.º 7
0
def set_up_to_fail(app, caplog):
    (bucket, source_prefix, dest_prefix) = get_s3_refs(app)
    caplog.set_level(logging.INFO)
    with capture_app_logs(app):
        with mock_s3(app, bucket=bucket) as m3:
            m3.Object(bucket,
                      f'{source_prefix}/2017/01/18/12345678_00012_1.pdf').put(
                          Body=b'a note attachment')
            m3.Object(bucket,
                      f'{source_prefix}/2018/12/22/23456789_00003_1.png').put(
                          Body=b'another note attachment')
            m3.Object(bucket,
                      f'{dest_prefix}/12345678/12345678_00012_1.pdf').put(
                          Body=b'a note attachment')
            m3.Object(bucket,
                      f'{dest_prefix}/34567890/34567890_00014_2.xls').put(
                          Body=b'yet another note attachment')
            m3.Object(
                bucket, f'{dest_prefix}/45678901/45678901_00192_4.xls'
            ).put(
                Body=b'bamboozled by a completely unexpected note attachment')
            with pytest.raises(BackgroundJobError) as e:
                yield
    assert 'Attachments verification found missing attachments or sync failures:' in str(
        e.value)
    assert '\'attachment_sync_failure_count\': 1' in str(e.value)
    assert '\'missing_s3_attachments_count\': 1' in str(e.value)
    assert '\'attachment_sync_failures\': [\'sis-data/sis-sftp/incremental/advising-notes/attachment-files/2018/12/22/23456789_00003_1.png\']' in str(
        e.value, )
    assert '\'missing_s3_attachments\': [\'23456789_00003_1.png\']' in str(
        e.value)
    assert 'Attachments missing on S3 when compared against SIS notes views: 1' in caplog.text
Exemplo n.º 8
0
    def test_sync_canvas_snapshots(self, app, metadata_db, caplog):
        """Dispatches a complete sync job against fixtures."""
        caplog.set_level(logging.INFO)
        with capture_app_logs(app):
            with mock_s3(app):
                result = SyncCanvasSnapshots().run_wrapped()
            assert 'Canvas snapshot sync job dispatched to workers' in result
            assert_background_job_status('sync')
            assert 'Dispatched S3 sync of snapshot quiz_dim-00000-0ab80c7c.gz' in caplog.text
            assert 'Dispatched S3 sync of snapshot requests-00098-b14782f5.gz' in caplog.text
            assert '311 successful dispatches, 0 failures' in caplog.text

            schema = app.config['RDS_SCHEMA_METADATA']

            count_results = rds.fetch(f'SELECT count(*) FROM {schema}.canvas_sync_job_status')
            assert count_results[0]['count'] == 311

            canvas_status_results = rds.fetch(f'SELECT DISTINCT status FROM {schema}.canvas_sync_job_status')
            assert len(canvas_status_results) == 1
            assert canvas_status_results[0]['status'] == 'created'

            sync_results = rds.fetch(f'SELECT * FROM {schema}.canvas_sync_job_status LIMIT 1')
            assert sync_results[0]['job_id'].startswith('sync_')
            assert sync_results[0]['filename'] == 'account_dim-00000-5eb7ee9e.gz'
            assert sync_results[0]['canvas_table'] == 'account_dim'
            assert 'account_dim/part-00505-5c40f1f3-b611-4f64-a007-67b775e984fe.c000.txt.gz' in sync_results[0]['source_url']
            assert sync_results[0]['destination_url'] is None
            assert sync_results[0]['details'] is None
            assert sync_results[0]['created_at']
            assert sync_results[0]['updated_at']
Exemplo n.º 9
0
    def test_import_student_photos(self, app, metadata_db, student_tables,
                                   caplog):
        from nessie.jobs.import_student_photos import ImportStudentPhotos
        caplog.set_level(logging.DEBUG)
        with capture_app_logs(app):
            with mock_s3(app):
                result = ImportStudentPhotos().run_wrapped()
                assert result == 'Student photo import completed: 1 succeeded, 9 had no photo available, 0 failed.'
                response = s3.get_keys_with_prefix('cal1card-data/photos')
                assert len(response) == 1
                assert response[0] == 'cal1card-data/photos/61889.jpg'

            success_rows = rds.fetch(
                f"SELECT * FROM {app.config['RDS_SCHEMA_METADATA']}.photo_import_status WHERE status = 'success'"
            )
            assert len(success_rows) == 1
            assert success_rows[0]['sid'] == '11667051'

            failure_rows = rds.fetch(
                f"SELECT * FROM {app.config['RDS_SCHEMA_METADATA']}.photo_import_status WHERE status = 'failure'"
            )
            assert len(failure_rows) == 0

            not_found_rows = rds.fetch(
                f"SELECT * FROM {app.config['RDS_SCHEMA_METADATA']}.photo_import_status WHERE status = 'photo_not_found'"
            )
            assert len(not_found_rows) == 9
Exemplo n.º 10
0
    def test_import_registrations(self, app, metadata_db, student_tables, caplog):
        from nessie.jobs.import_registrations import ImportRegistrations
        rows = redshift.fetch('SELECT * FROM student_test.student_term_gpas')
        assert len(rows) == 0
        rows = redshift.fetch('SELECT * FROM student_test.student_last_registrations')
        assert len(rows) == 0
        caplog.set_level(logging.DEBUG)
        with capture_app_logs(app):
            with mock_s3(app):
                result = ImportRegistrations().run_wrapped()
            assert result == 'Registrations import completed: 2 succeeded, 8 failed.'
            rows = redshift.fetch('SELECT * FROM student_test.student_term_gpas ORDER BY sid')
            assert len(rows) == 11
            for row in rows[0:6]:
                assert row['sid'] == '11667051'
            for row in rows[7:10]:
                assert row['sid'] == '1234567890'
            row_2168 = next(r for r in rows if r['term_id'] == '2168')
            assert row_2168['gpa'] == Decimal('3.000')
            assert row_2168['units_taken_for_gpa'] == Decimal('8.0')

            rows = redshift.fetch('SELECT * FROM student_test.student_last_registrations ORDER BY sid')
            assert len(rows) == 2
            assert rows[0]['sid'] == '11667051'
            assert rows[1]['sid'] == '1234567890'
            feed = json.loads(rows[1]['feed'], strict=False)
            assert feed['term']['id'] == '2172'
            assert feed['academicLevels'][0]['level']['description'] == 'Sophomore'

            rows = redshift.fetch('SELECT * FROM student_test.student_api_demographics ORDER BY sid')
            assert len(rows) == 2
            assert rows[0]['sid'] == '11667051'
            assert rows[1]['sid'] == '1234567890'
            feed = json.loads(rows[1]['feed'], strict=False)
            assert feed['gender']['genderOfRecord']['description'] == 'Female'
Exemplo n.º 11
0
    def test_run_with_all_param(self, app, caplog):
        """When 'all' is provided, copies all files."""
        (bucket, source_prefix, dest_prefix) = get_s3_refs(app)
        datestamp = 'all'

        caplog.set_level(logging.INFO)
        with capture_app_logs(app):
            with mock_s3(app, bucket=bucket) as m3:
                m3.Object(
                    bucket,
                    f'{source_prefix}/2019/08/28/12345678_00012_1.pdf').put(
                        Body=b'a note attachment')
                m3.Object(
                    bucket,
                    f'{source_prefix}/2019/08/28/23456789_00003_1.png').put(
                        Body=b'another note attachment')
                m3.Object(
                    bucket,
                    f'{source_prefix}/2019/08/29/34567890_00014_2.xls').put(
                        Body=b'ok to copy me')

                response = MigrateSisAdvisingNoteAttachments().run(
                    datestamp=datestamp)

                assert 'Will copy files from /sis-data/sis-sftp/incremental/advising-notes/attachment-files.' in caplog.text
                assert 'Copied 3 attachments to the destination folder.' in caplog.text
                assert response == (
                    'SIS advising note attachment migration complete for sis-data/sis-sftp/incremental/advising-notes/attachment-files.'
                )
                assert object_exists(
                    m3, bucket, f'{dest_prefix}/12345678/12345678_00012_1.pdf')
                assert object_exists(
                    m3, bucket, f'{dest_prefix}/23456789/23456789_00003_1.png')
                assert object_exists(
                    m3, bucket, f'{dest_prefix}/34567890/34567890_00014_2.xls')
Exemplo n.º 12
0
 def test_import_degree_progress(self, app, metadata_db, student_tables, caplog):
     from nessie.jobs.import_degree_progress import ImportDegreeProgress
     with mock_s3(app):
         result = ImportDegreeProgress().run_wrapped()
     assert result == 'SIS degree progress API import job completed: 1 succeeded, 9 returned no information, 0 failed.'
     rows = redshift.fetch(f"SELECT * FROM {student_schema()}.{student_schema_table('degree_progress')}")
     assert len(rows) == 1
     assert rows[0]['sid'] == '11667051'
     feed = json.loads(rows[0]['feed'])
     assert feed['requirements']['entryLevelWriting']['status'] == 'Satisfied'
Exemplo n.º 13
0
 def test_update_manifests(self, app):
     """Updates manifests in S3."""
     from nessie.jobs.create_sis_schema import CreateSisSchema
     with mock_s3(app):
         daily_path = get_s3_sis_daily_path()
         historical_path = app.config[
             'LOCH_S3_SIS_DATA_PATH'] + '/historical'
         self._upload_data_to_s3(daily_path, historical_path)
         assert CreateSisSchema().update_manifests()
         self._assert_complete_manifest(app, daily_path, historical_path)
Exemplo n.º 14
0
 def test_fallback_update_manifests(self, app):
     """Uses yesterday's news if today's is unavailable."""
     from nessie.jobs.create_sis_schema import CreateSisSchema
     with mock_s3(app):
         yesterday = datetime.now() - timedelta(days=1)
         daily_path = get_s3_sis_daily_path(yesterday)
         historical_path = app.config[
             'LOCH_S3_SIS_DATA_PATH'] + '/historical'
         self._upload_data_to_s3(daily_path, historical_path)
         assert CreateSisSchema().update_manifests()
         self._assert_complete_manifest(app, daily_path, historical_path)
Exemplo n.º 15
0
    def test_run_with_no_param(self, mock_datetime, app, caplog, metadata_db,
                               prior_job_status):
        """When no parameter is provided, copies new files since the last succesful run."""
        (bucket, source_prefix, dest_prefix) = get_s3_refs(app)
        mock_datetime.utcnow.return_value = datetime(year=2019,
                                                     month=8,
                                                     day=29,
                                                     hour=5,
                                                     minute=21)

        caplog.set_level(logging.INFO)
        with capture_app_logs(app):
            with mock_s3(app, bucket=bucket) as m3:
                m3.Object(
                    bucket,
                    f'{source_prefix}/2019/08/25/45678912_00027_1.pdf').put(
                        Body=b'i\'ve already been copied')
                m3.Object(
                    bucket,
                    f'{source_prefix}/2019/08/26/12345678_00012_1.pdf').put(
                        Body=b'a note attachment')
                m3.Object(
                    bucket,
                    f'{source_prefix}/2019/08/28/23456789_00003_1.png').put(
                        Body=b'another note attachment')
                m3.Object(
                    bucket,
                    f'{source_prefix}/2019/08/29/34567890_00014_2.xls').put(
                        Body=b'don\'t copy me')

                response = MigrateSisAdvisingNoteAttachments().run()

                assert 'Will copy files from /sis-data/sis-sftp/incremental/advising-notes/attachment-files/2019/08/25.' not in caplog.text
                assert 'Will copy files from /sis-data/sis-sftp/incremental/advising-notes/attachment-files/2019/08/26.' in caplog.text
                assert 'Will copy files from /sis-data/sis-sftp/incremental/advising-notes/attachment-files/2019/08/27.' in caplog.text
                assert 'Will copy files from /sis-data/sis-sftp/incremental/advising-notes/attachment-files/2019/08/28.' in caplog.text
                assert 'Will copy files from /sis-data/sis-sftp/incremental/advising-notes/attachment-files/2019/08/29.' not in caplog.text
                assert 'Copied 1 attachments to the destination folder.' in caplog.text
                assert 'Copied 0 attachments to the destination folder.' in caplog.text
                assert response == (
                    'SIS advising note attachment migration complete for sis-data/sis-sftp/incremental/advising-notes/attachment-files/2019/08/26, \
sis-data/sis-sftp/incremental/advising-notes/attachment-files/2019/08/27, \
sis-data/sis-sftp/incremental/advising-notes/attachment-files/2019/08/28.')
                assert not object_exists(
                    m3, bucket, f'{dest_prefix}/45678912/45678912_00027_1.xls')
                assert object_exists(
                    m3, bucket, f'{dest_prefix}/12345678/12345678_00012_1.pdf')
                assert object_exists(
                    m3, bucket, f'{dest_prefix}/23456789/23456789_00003_1.png')
                assert not object_exists(
                    m3, bucket, f'{dest_prefix}/34567890/34567890_00014_2.xls')
Exemplo n.º 16
0
 def test_aborts_on_missing_term(self, app, caplog):
     from nessie.jobs.create_sis_schema import CreateSisSchema
     with mock_s3(app):
         daily_path = get_s3_sis_daily_path()
         historical_path = app.config[
             'LOCH_S3_SIS_DATA_PATH'] + '/historical'
         self._upload_data_to_s3(daily_path, historical_path)
         s3.delete_objects(
             [f'{daily_path}/enrollments/enrollments-2178.gz'])
         with capture_app_logs(app):
             with pytest.raises(BackgroundJobError) as e:
                 CreateSisSchema().update_manifests()
             assert 'Expected filename enrollments-2178.gz not found in S3, aborting' in str(
                 e.value)
Exemplo n.º 17
0
 def test_import_sis_student_api_v1(self, app, metadata_db, student_tables, caplog):
     from nessie.jobs.import_sis_student_api import ImportSisStudentApi
     with override_config(app, 'STUDENT_V1_API_PREFERRED', True):
         with mock_s3(app):
             result = ImportSisStudentApi().run_wrapped()
         assert result == 'SIS student API V1 import job completed: 4 succeeded, 6 failed.'
         rows = redshift.fetch('SELECT * FROM student_test.sis_api_profiles_v1 ORDER BY sid')
         assert len(rows) == 4
         assert rows[0]['sid'] == '11667051'
         assert rows[1]['sid'] == '1234567890'
         assert rows[2]['sid'] == '2345678901'
         assert rows[3]['sid'] == '5000000000'
         feed = json.loads(rows[0]['feed'], strict=False)
         assert feed['names'][0]['familyName'] == 'Bear'
Exemplo n.º 18
0
 def test_canvas_sync_metadata(self, app, metadata_db):
     """Makes an API call and puts the result in S3."""
     with mock_s3(app):
         bucket = app.config['LOCH_S3_BUCKET']
         path = '/api/v1/audit/grade_change/courses/7654321'
         s3_key = f'{bucket}/grade_change_log/grade_change_log_7654321'
         result = ImportCanvasApiData().run_wrapped(
             course_id='7654321',
             path=path,
             s3_key=s3_key,
             job_id='ImportCanvasGradeChangeLog_123',
         )
         assert result is True
         assert s3.object_exists(f'{s3_key}_0.json') is True
Exemplo n.º 19
0
 def test_import_sis_student_api(self, app, metadata_db, student_tables,
                                 caplog):
     from nessie.jobs.import_sis_student_api import ImportSisStudentApi
     with mock_s3(app):
         result = ImportSisStudentApi().run_wrapped()
     assert result == 'SIS student API import job completed: 2 succeeded, 6 failed.'
     rows = redshift.fetch(
         'SELECT * FROM student_test.sis_api_profiles ORDER BY sid')
     print(rows)
     assert len(rows) == 2
     assert rows[0]['sid'] == '11667051'
     assert rows[1]['sid'] == '2345678901'
     feed = json.loads(rows[0]['feed'], strict=False)
     assert feed['names'][0]['familyName'] == 'Bear'
Exemplo n.º 20
0
 def test_import_term_gpas(self, app, metadata_db, student_tables, caplog):
     from nessie.jobs.import_term_gpas import ImportTermGpas
     caplog.set_level(logging.DEBUG)
     with capture_app_logs(app):
         with mock_s3(app):
             result = ImportTermGpas().run_wrapped()
         assert result == 'Term GPA import completed: 1 succeeded, 0 returned no registrations, 7 failed.'
         rows = redshift.fetch(
             'SELECT * FROM student_test.student_term_gpas')
         assert len(rows) == 7
         for row in rows:
             assert row['sid'] == '11667051'
         row_2178 = next(r for r in rows if r['term_id'] == '2178')
         assert row_2178['gpa'] == Decimal('3.000')
         assert row_2178['units_taken_for_gpa'] == Decimal('8.0')
 def test_import_sis_enrollments_api(self, app, metadata_db, student_tables,
                                     caplog):
     from nessie.jobs.import_sis_enrollments_api import ImportSisEnrollmentsApi
     with mock_s3(app):
         result = ImportSisEnrollmentsApi().run_wrapped()
     assert result == 'SIS enrollments API import completed for term 2178: 1 succeeded, 7 returned no enrollments, 0 failed.'
     rows = redshift.fetch(
         'SELECT * FROM student_test.sis_api_drops_and_midterms')
     assert len(rows) == 1
     assert rows[0]['sid'] == '11667051'
     feed = json.loads(rows[0]['feed'])
     assert feed['droppedPrimarySections'][0]['displayName'] == 'MUSIC 41C'
     assert feed['droppedPrimarySections'][0]['component'] == 'TUT'
     assert feed['droppedPrimarySections'][0]['sectionNumber'] == '002'
     assert feed['midtermGrades']['90100'] == 'D+'
Exemplo n.º 22
0
    def test_fallback_update_manifests(self, app):
        """Uses yesterday's news if today's is unavailable."""
        with mock_s3(app):
            yesterday = datetime.now() - timedelta(days=1)
            daily_path = get_s3_sis_daily_path(yesterday)
            historical_path = app.config[
                'LOCH_S3_SIS_DATA_PATH'] + '/historical'
            manifest_path = app.config['LOCH_S3_SIS_DATA_PATH'] + '/manifests'

            s3.upload_data('some new course data',
                           f'{daily_path}/courses/courses-aaa.gz')
            s3.upload_data('some more new course data',
                           f'{daily_path}/courses/courses-bbb.gz')
            s3.upload_data('some new enrollment data',
                           f'{daily_path}/enrollments/enrollments-ccc.gz')
            s3.upload_data('some old course data',
                           f'{historical_path}/courses/courses-ddd.gz')
            s3.upload_data(
                'some old enrollment data',
                f'{historical_path}/enrollments/enrollments-eee.gz')
            s3.upload_data(
                'some perfectly antique enrollment data',
                f'{historical_path}/enrollments/enrollments-fff.gz')

            assert CreateSisSchema().update_manifests()

            courses_manifest = json.loads(
                s3.get_object_text(manifest_path + '/courses.json'))
            assert len(courses_manifest['entries']) == 3
            assert courses_manifest['entries'][0][
                'url'] == f's3://{app.config["LOCH_S3_BUCKET"]}/{daily_path}/courses/courses-aaa.gz'
            assert courses_manifest['entries'][0]['meta'][
                'content_length'] == 20

            enrollments_manifest = json.loads(
                s3.get_object_text(manifest_path + '/enrollments.json'))
            assert len(enrollments_manifest['entries']) == 3
            assert (
                enrollments_manifest['entries'][2]['url'] ==
                f's3://{app.config["LOCH_S3_BUCKET"]}/{historical_path}/enrollments/enrollments-fff.gz'
            )
            assert enrollments_manifest['entries'][2]['meta'][
                'content_length'] == 38
Exemplo n.º 23
0
 def test_import_sis_student_api(self, app, metadata_db, student_tables, caplog):
     from nessie.jobs.import_sis_student_api import ImportSisStudentApi
     initial_rows = redshift.fetch('SELECT * FROM student_test.sis_api_profiles ORDER BY sid')
     assert len(initial_rows) == 0
     with mock_s3(app):
         result = ImportSisStudentApi().run_wrapped()
     assert result == 'SIS student API import job completed: 3 succeeded, 7 failed.'
     rows = redshift.fetch('SELECT * FROM student_test.sis_api_profiles ORDER BY sid')
     assert len(rows) == 3
     assert rows[0]['sid'] == '11667051'
     feed = json.loads(rows[0]['feed'], strict=False)
     assert feed['names'][0]['familyName'] == 'Bear'
     assert feed['registrations'][0]['term']['id'] == '2178'
     assert rows[1]['sid'] == '1234567890'
     feed = json.loads(rows[1]['feed'], strict=False)
     # Needed to test proper sis_profile merging of last_registrations table.
     assert not feed.get('registrations')
     assert rows[2]['sid'] == '2345678901'
     feed = json.loads(rows[2]['feed'], strict=False)
     assert feed['registrations'][0]['term']['id'] == '2178'
Exemplo n.º 24
0
    def test_import_registrations_batch_mode(self, app, metadata_db, student_tables, caplog):
        from nessie.jobs.import_registrations import ImportRegistrations
        with mock_s3(app):
            ImportRegistrations().run_wrapped()
            rows = rds.fetch('SELECT * FROM nessie_metadata_test.registration_import_status')
            assert len(rows) == 10

            with override_config(app, 'CYCLICAL_API_IMPORT_BATCH_SIZE', 9):

                def _success_history_after_batch_import():
                    result = ImportRegistrations().run_wrapped(load_mode='batch')
                    assert result == 'Registrations import completed: 1 succeeded, 8 failed.'
                    rows = rds.fetch("SELECT * FROM nessie_metadata_test.registration_import_status WHERE status = 'success' ORDER BY updated_at")
                    assert len(rows) == 2
                    assert rows[0]['updated_at'] < rows[1]['updated_at']
                    return (rows[0]['sid'], rows[1]['sid'])

                sid_1, sid_2 = _success_history_after_batch_import()
                assert _success_history_after_batch_import() == (sid_2, sid_1)
                assert _success_history_after_batch_import() == (sid_1, sid_2)
Exemplo n.º 25
0
    def test_list_keys_matching_prefix(self, app):
        """Lists keys matching prefix."""
        bucket = app.config['LOCH_S3_BUCKET']
        prefix = app.config[
            'LOCH_S3_CANVAS_DATA_PATH_CURRENT_TERM'] + '/requests'

        with mock_s3(app) as m:
            m.Object(bucket,
                     f'{prefix}/requests-aaa.gz').put(Body=b'some data')
            m.Object(bucket,
                     f'{prefix}/requests-bbb.gz').put(Body=b'some more data')
            m.Object(bucket,
                     f'{prefix}/requests-ccc.gz').put(Body=b'yet more data')
            m.Object(bucket, 'another-prefix/requests-ddd.gz').put(
                Body=b'utterly unrelated data')

            response = s3.get_keys_with_prefix(prefix)
            assert len(response) == 3
            assert f'{prefix}/requests-aaa.gz' in response
            assert f'{prefix}/requests-bbb.gz' in response
            assert f'{prefix}/requests-ccc.gz' in response
Exemplo n.º 26
0
    def test_first_time_run_with_no_param(self, mock_datetime, app, caplog,
                                          metadata_db):
        """When no parameter is provided and there is no prior successful run, copies all files."""
        (bucket, source_prefix, dest_prefix) = get_s3_refs(app)
        mock_datetime.utcnow.return_value = datetime(year=2019,
                                                     month=8,
                                                     day=29,
                                                     hour=5,
                                                     minute=21)

        caplog.set_level(logging.INFO)
        with capture_app_logs(app):
            with mock_s3(app, bucket=bucket) as m3:
                m3.Object(
                    bucket,
                    f'{source_prefix}/2019/08/28/12345678_00012_1.pdf').put(
                        Body=b'a note attachment')
                m3.Object(
                    bucket,
                    f'{source_prefix}/2019/08/28/23456789_00003_1.png').put(
                        Body=b'another note attachment')
                m3.Object(
                    bucket,
                    f'{source_prefix}/2019/08/29/34567890_00014_2.xls').put(
                        Body=b'ok to copy me')

                response = MigrateSisAdvisingNoteAttachments().run()

                assert 'Will copy files from /sis-data/sis-sftp/incremental/advising-notes/attachment-files/.' in caplog.text
                assert 'Copied 3 attachments to the destination folder.' in caplog.text
                assert response == (
                    'SIS advising note attachment migration complete for sis-data/sis-sftp/incremental/advising-notes/attachment-files/.'
                )
                assert object_exists(
                    m3, bucket, f'{dest_prefix}/12345678/12345678_00012_1.pdf')
                assert object_exists(
                    m3, bucket, f'{dest_prefix}/23456789/23456789_00003_1.png')
                assert object_exists(
                    m3, bucket, f'{dest_prefix}/34567890/34567890_00014_2.xls')
Exemplo n.º 27
0
    def test_run_with_invalid_param(self, app, caplog):
        """When invalid value is provided, job completes but copies zero files."""
        (bucket, source_prefix, dest_prefix) = get_s3_refs(app)
        datestamp = 'wrong!#$&'

        caplog.set_level(logging.INFO)
        with capture_app_logs(app):
            with mock_s3(app, bucket=bucket) as m3:
                m3.Object(
                    bucket,
                    f'{source_prefix}/2019/08/28/12345678_00012_1.pdf').put(
                        Body=b'a note attachment')

                response = MigrateSisAdvisingNoteAttachments().run(
                    datestamp=datestamp)

                assert 'Will copy files from /sis-data/sis-sftp/incremental/advising-notes/attachment-files/wrong!#$&.' in caplog.text
                assert 'Copied 0 attachments to the destination folder.' in caplog.text
                assert response == (
                    'SIS advising note attachment migration complete for sis-data/sis-sftp/incremental/advising-notes/attachment-files/wrong!#$&.'
                )
                assert not object_exists(
                    m3, bucket, f'{dest_prefix}/12345678/12345678_00012_1.pdf')
Exemplo n.º 28
0
 def test_metadata_tracked(self, app, metadata_db, student_tables, caplog):
     from nessie.jobs.import_registrations import ImportRegistrations
     rows = rds.fetch('SELECT * FROM nessie_metadata_test.registration_import_status')
     assert len(rows) == 0
     caplog.set_level(logging.DEBUG)
     with capture_app_logs(app):
         with mock_s3(app):
             ImportRegistrations().run_wrapped()
             rows = rds.fetch('SELECT * FROM nessie_metadata_test.registration_import_status')
             assert len(rows) == 10
             assert len([r for r in rows if r['status'] == 'failure']) == 8
             assert next(r['status'] for r in rows if r['sid'] == '11667051') == 'success'
             result = ImportRegistrations().run_wrapped()
             assert result == 'Registrations import completed: 0 succeeded, 8 failed.'
             result = ImportRegistrations().run_wrapped(load_mode='all')
             assert result == 'Registrations import completed: 2 succeeded, 8 failed.'
             rds.execute("DELETE FROM nessie_metadata_test.registration_import_status WHERE sid = '11667051'")
             result = ImportRegistrations().run_wrapped()
             assert result == 'Registrations import completed: 1 succeeded, 8 failed.'
             assert next(r['status'] for r in rows if r['sid'] == '11667051') == 'success'
             rds.execute("UPDATE nessie_metadata_test.registration_import_status SET status='failure' WHERE sid = '11667051'")
             result = ImportRegistrations().run_wrapped()
             assert result == 'Registrations import completed: 1 succeeded, 8 failed.'
             assert next(r['status'] for r in rows if r['sid'] == '11667051') == 'success'
Exemplo n.º 29
0
    def test_generate_demographics_feeds(self, app, student_tables):
        """Builds JSON feeds and uploads to S3."""
        from nessie.jobs.create_edl_schema import CreateEdlSchema
        with override_config(app, 'FEATURE_FLAG_ENTERPRISE_DATA_LAKE', True):
            with mock_s3(app):
                CreateEdlSchema().generate_demographics_feeds()

            rows = redshift.fetch(
                f'SELECT * FROM {student_schema()}.student_demographics')
            assert len(rows) == 11
            assert rows[0]['sid'] == '11667051'
            feed = json.loads(rows[0]['feed'])
            assert feed['gender'] == 'Female'
            assert feed['ethnicities'] == [
                'African-American / Black', 'Chinese / Chinese-American',
                'East Indian / Pakistani'
            ]
            assert feed['nationalities'] == ['Singapore']
            assert feed['underrepresented'] is True
            assert feed['visa']['visa_type'] == 'PR'
            assert feed['visa']['visa_status'] == 'A'

            assert rows[1]['sid'] == '1234567890'
            feed = json.loads(rows[1]['feed'])
            assert feed['gender'] == 'Male'
            assert feed['ethnicities'] == [
                'Mexican / Mexican-American / Chicano', 'White'
            ]
            assert feed['nationalities'] == ['Iran (Islamic Republic Of)']
            assert feed['underrepresented'] is True
            assert feed['visa']['visa_type'] == 'F1'
            assert feed['visa']['visa_status'] == 'A'

            assert rows[2]['sid'] == '2345678901'
            feed = json.loads(rows[2]['feed'])
            assert feed['gender'] == 'Female'
            assert feed['ethnicities'] == ['White']
            assert feed['nationalities'] == ['Taiwan']
            assert feed['underrepresented'] is False
            assert feed['visa']['visa_type'] is None
            assert feed['visa']['visa_status'] is None

            assert rows[3]['sid'] == '3456789012'
            feed = json.loads(rows[3]['feed'])
            assert feed['gender'] == 'Decline to State'
            assert feed['ethnicities'] == [
                'American Indian / Alaska Native',
                'Filipino / Filipino-American'
            ]
            assert feed['nationalities'] == ['Korea, Republic of']
            assert feed['underrepresented'] is True
            assert feed['visa']['visa_type'] == 'J1'
            assert feed['visa']['visa_status'] == 'G'

            assert rows[4]['sid'] == '5000000000'
            feed = json.loads(rows[4]['feed'])
            assert feed['gender'] == 'Female'
            assert feed['ethnicities'] == ['Not Specified']
            assert feed['nationalities'] == []
            assert feed['underrepresented'] is False
            assert feed['visa']['visa_type'] is None
            assert feed['visa']['visa_status'] is None

            assert rows[7]['sid'] == '8901234567'
            feed = json.loads(rows[7]['feed'])
            assert feed['gender'] == 'Decline to State'
            assert feed['ethnicities'] == ['Not Specified']
            assert feed['nationalities'] == []
            assert feed['underrepresented'] is False
            assert feed['visa']['visa_type'] is None
            assert feed['visa']['visa_status'] is None

            assert rows[9]['sid'] == '9000000000'
            feed = json.loads(rows[9]['feed'])
            assert feed['gender'] == 'Nonbinary'
            assert feed['ethnicities'] == [
                'African-American / Black', 'Other Asian', 'Pacific Islander'
            ]
            assert feed['nationalities'] == [
                "Lao People's Democratic Rep", 'Saint Kitts and Nevis'
            ]
            assert feed['underrepresented'] is True
            assert feed['visa']['visa_type'] is None
            assert feed['visa']['visa_status'] is None
Exemplo n.º 30
0
    def test_resync_canvas_snapshots(self, app, metadata_db, caplog):
        """Dispatches a complete resync job against fixtures."""
        caplog.set_level(logging.INFO)
        snapshots = canvas_data.get_snapshots()['files']

        def mock_metadata(job_id, snapshot, status, destination_size):
            metadata.create_canvas_sync_status(job_id, snapshot['filename'],
                                               snapshot['table'],
                                               snapshot['url'])
            key = '/'.join([
                get_s3_canvas_daily_path(), snapshot['table'],
                snapshot['filename']
            ])
            metadata.update_canvas_sync_status(
                job_id,
                key,
                status,
                source_size=1048576,
                destination_size=destination_size)

        old_sync_job = 'sync_152550000'
        latest_sync_job = 'sync_152560000'

        # The older job should be ignored by the resync.
        for snapshot in snapshots[0:5]:
            mock_metadata(old_sync_job, snapshot, 'complete', 1048576)
        for snapshot in snapshots[5:10]:
            mock_metadata(old_sync_job, snapshot, 'error', None)

        # The latest job synced five files successfully and ran into three problems.
        for snapshot in snapshots[10:15]:
            mock_metadata(latest_sync_job, snapshot, 'complete', 1048576)
        stalled = snapshots[15]
        errored = snapshots[16]
        size_discrepancy = snapshots[17]
        mock_metadata(latest_sync_job, stalled, 'streaming', None)
        mock_metadata(latest_sync_job, errored, 'error', None)
        mock_metadata(latest_sync_job, size_discrepancy, 'complete', 65536)

        schema = app.config['RDS_SCHEMA_METADATA']

        with capture_app_logs(app):
            assert rds.fetch(
                f'SELECT count(*) FROM {schema}.canvas_sync_job_status'
            )[0]['count'] == 18
            with mock_s3(app):
                result = ResyncCanvasSnapshots().run_wrapped()
            assert 'Canvas snapshot resync job dispatched to workers' in result
            assert_background_job_status('resync')
            assert f"Dispatched S3 resync of snapshot {stalled['filename']}" in caplog.text
            assert f"Dispatched S3 resync of snapshot {errored['filename']}" in caplog.text
            assert f"Dispatched S3 resync of snapshot {size_discrepancy['filename']}" in caplog.text
            assert '3 successful dispatches, 0 failures' in caplog.text

        assert rds.fetch(
            f'SELECT count(*) FROM {schema}.canvas_sync_job_status'
        )[0]['count'] == 21
        resync_results = rds.fetch(
            f"SELECT * FROM {schema}.canvas_sync_job_status WHERE job_id LIKE 'resync%'"
        )
        assert len(resync_results) == 3

        urls = []
        for r in resync_results:
            assert r['job_id'].startswith('resync_')
            assert r['filename']
            assert r['canvas_table']
            assert r['created_at']
            assert r['updated_at']
            urls.append(r['source_url'])
        assert stalled['url'] in urls
        assert errored['url'] in urls
        assert size_discrepancy['url'] in urls