Exemplo n.º 1
0
 def run(self):
     app.logger.info(f'Starting student schema creation job...')
     app.logger.info(f'Executing SQL...')
     resolved_ddl = resolve_sql_template(
         'create_student_schema.template.sql')
     if redshift.execute_ddl_script(resolved_ddl):
         app.logger.info(
             f"Schema '{app.config['REDSHIFT_SCHEMA_STUDENT']}' found or created."
         )
     else:
         app.logger.error(f'Student schema creation failed.')
         return False
     resolved_ddl_staging = resolve_sql_template(
         'create_student_schema.template.sql',
         redshift_schema_student=app.config['REDSHIFT_SCHEMA_STUDENT'] +
         '_staging',
     )
     if redshift.execute_ddl_script(resolved_ddl_staging):
         app.logger.info(
             f"Schema '{app.config['REDSHIFT_SCHEMA_STUDENT']}_staging' found or created."
         )
     else:
         app.logger.error(f'Student staging schema creation failed.')
         return False
     return True
    def run(self):
        app.logger.info('Starting intermediate table generation job...')

        sis_source_schema = app.config['REDSHIFT_SCHEMA_EDL'] if app.config[
            'FEATURE_FLAG_EDL_SIS_VIEWS'] else app.config['REDSHIFT_SCHEMA_SIS']

        resolved_ddl_redshift = resolve_sql_template(
            'create_intermediate_schema.template.sql',
            current_term_id=current_term_id(),
            redshift_schema_sis=sis_source_schema,
        )
        if redshift.execute_ddl_script(resolved_ddl_redshift):
            app.logger.info('Redshift tables generated.')
        else:
            raise BackgroundJobError('Intermediate table creation job failed.')

        resolved_ddl_rds = resolve_sql_template(
            'update_rds_indexes_sis.template.sql')
        if rds.execute(resolved_ddl_rds):
            app.logger.info('RDS indexes updated.')
        else:
            raise BackgroundJobError(
                'Failed to update RDS indexes for intermediate schema.')

        return 'Intermediate table generation job completed.'
    def run(self):
        app.logger.info('Start generating canvas caliper analytics')
        redshift_schema_caliper_analytics = app.config['REDSHIFT_SCHEMA_CALIPER']
        redshift_schema_lrs_external = app.config['REDSHIFT_SCHEMA_LRS']
        canvas_caliper_explode_table = 'canvas_caliper_explode'

        # Because the Caliper incrementals are provided by a Glue job running on a different schedule, the latest batch
        # may have been delivered before last midnight UTC.
        s3_caliper_daily_path = get_s3_daily_canvas_caliper_explode_path()
        if not s3.get_keys_with_prefix(s3_caliper_daily_path):
            s3_caliper_daily_path = get_s3_daily_canvas_caliper_explode_path(datetime.now() - timedelta(days=1))
            if not s3.get_keys_with_prefix(s3_caliper_daily_path):
                raise BackgroundJobError('No timely S3 Caliper extracts found')
            else:
                app.logger.info('Falling back S3 Caliper extracts for yesterday')
        s3_caliper_daily_url = s3.build_s3_url(s3_caliper_daily_path)

        resolved_ddl_caliper_explode = resolve_sql_template(
            'create_lrs_canvas_explode_table.template.sql',
            canvas_caliper_explode_table=canvas_caliper_explode_table,
            loch_s3_caliper_explode_url=s3_caliper_daily_url,
        )
        redshift.drop_external_schema(redshift_schema_lrs_external)
        if redshift.execute_ddl_script(resolved_ddl_caliper_explode):
            app.logger.info('Caliper explode schema and table successfully created.')
        else:
            raise BackgroundJobError('Caliper explode schema and table creation failed.')

        # Sanity-check event times from the latest Caliper batch against previously transformed event times.
        def datetime_from_query(query):
            response = redshift.fetch(query)
            timestamp = response and response[0] and response[0].get('timestamp')
            if not timestamp:
                raise BackgroundJobError(f'Timestamp query failed to return data for comparison; aborting job: {query}')
            if isinstance(timestamp, str):
                timestamp = datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%fZ')
            return timestamp

        earliest_untransformed = datetime_from_query(
            f'SELECT MIN(timestamp) AS timestamp FROM {redshift_schema_lrs_external}.{canvas_caliper_explode_table}',
        )
        latest_transformed = datetime_from_query(
            f'SELECT MAX(timestamp) AS timestamp FROM {redshift_schema_caliper_analytics}.canvas_caliper_user_requests',
        )
        if not earliest_untransformed or not latest_transformed:
            return False
        timestamp_diff = (earliest_untransformed - latest_transformed).total_seconds()
        lower_bound_tolerance, upper_bound_tolerance = app.config['LOCH_CANVAS_CALIPER_TIMESTAMP_DISCREPANCY_TOLERANCE']
        if timestamp_diff < lower_bound_tolerance or timestamp_diff > upper_bound_tolerance:
            raise BackgroundJobError(
                f'Unexpected difference between Caliper timestamps: latest transformed {latest_transformed}, '
                f'earliest untransformed {earliest_untransformed}',
            )

        resolved_ddl_caliper_analytics = resolve_sql_template('generate_caliper_analytics.template.sql')
        if redshift.execute_ddl_script(resolved_ddl_caliper_analytics):
            return 'Caliper analytics tables successfully created.'
        else:
            raise BackgroundJobError('Caliper analytics tables creation failed.')
Exemplo n.º 4
0
    def verify_migration(self, incremental_url, incremental_schema):
        redshift.drop_external_schema(incremental_schema)
        resolved_ddl_transient = resolve_sql_template(
            'create_lrs_statements_table.template.sql',
            redshift_schema_lrs_external=incremental_schema,
            loch_s3_lrs_statements_path=incremental_url,
        )
        if redshift.execute_ddl_script(resolved_ddl_transient):
            app.logger.info(
                f"LRS incremental schema '{incremental_schema}' created.")
        else:
            raise BackgroundJobError(
                f"LRS incremental schema '{incremental_schema}' creation failed."
            )

        redshift_response = redshift.fetch(
            f'select count(*) from {incremental_schema}.statements')
        if redshift_response:
            redshift_statement_count = redshift_response[0].get('count')
        else:
            raise BackgroundJobError(
                f"Failed to verify LRS incremental schema '{incremental_schema}'."
            )

        if redshift_statement_count == self.lrs_statement_count:
            app.logger.info(
                f'Verified {redshift_statement_count} rows migrated from LRS to {incremental_url}.'
            )
        else:
            raise BackgroundJobError(
                f'Discrepancy between LRS ({self.lrs_statement_count} statements)'
                f' and {incremental_url} ({redshift_statement_count} statements).'
            )
Exemplo n.º 5
0
def student_tables(app):
    """Use Postgres to mock the Redshift student schemas on local test runs."""
    from nessie.externals import rds, redshift
    from nessie.lib.util import resolve_sql_template_string, resolve_sql_template
    rds.execute(resolve_sql_template('create_rds_indexes.template.sql'))
    fixture_path = f"{app.config['BASE_DIR']}/fixtures"
    with open(f'{fixture_path}/students.sql', 'r') as sql_file:
        student_sql = sql_file.read()
    params = {}
    for key in [
            'sis_api_drops_and_midterms_11667051_2178',
            'sis_degree_progress_11667051',
            'sis_student_api_11667051',
            'sis_student_api_2345678901',
    ]:
        with open(f'{fixture_path}/{key}.json', 'r') as f:
            feed = f.read()
            if key.startswith('sis_student_api'):
                feed = json.dumps(
                    json.loads(feed)['apiResponse']['response']['any']
                    ['students'][0])
            params[key] = feed
    redshift.execute(resolve_sql_template_string(student_sql), params=params)
    yield
    for schema in ['asc_test', 'coe_test', 'student_test']:
        rds.execute(f'DROP SCHEMA {schema} CASCADE')
        redshift.execute(f'DROP SCHEMA {schema} CASCADE')
Exemplo n.º 6
0
    def verify_unloaded_count(self, url):
        url = path.split(url)[0]
        schema = app.config['REDSHIFT_SCHEMA_LRS']
        resolved_ddl_transient_unloaded = resolve_sql_template(
            'create_lrs_statements_unloaded_table.template.sql',
            redshift_schema_lrs_external=schema,
            loch_s3_lrs_statements_unloaded_path=url,
        )
        if redshift.execute_ddl_script(resolved_ddl_transient_unloaded):
            app.logger.info(
                f"statements_unloaded table created in schema '{schema}'.")
        else:
            raise BackgroundJobError(
                f"Failed to create statements_unloaded table in schema '{schema}'."
            )

        redshift_response = redshift.fetch(
            f'select count(*) from {schema}.statements_unloaded')
        if redshift_response:
            unloaded_statement_count = redshift_response[0].get('count')
        else:
            raise BackgroundJobError('Failed to get unloaded statement count.')

        if unloaded_statement_count == self.lrs_statement_count:
            app.logger.info(
                f'Verified {unloaded_statement_count} unloaded from LRS to {url}.'
            )
        else:
            raise BackgroundJobError(
                f'Discrepancy between LRS ({self.lrs_statement_count} statements)'
                f' and {url} ({unloaded_statement_count} statements).')
Exemplo n.º 7
0
 def create_rds_indexes(self):
     resolved_ddl = resolve_sql_template('index_advisors.template.sql')
     if rds.execute(resolved_ddl):
         app.logger.info('Created RDS indexes for advisor schema.')
     else:
         raise BackgroundJobError(
             'Failed to create RDS indexes for advisor schema.')
Exemplo n.º 8
0
 def index_appointment_advisors(self):
     resolved_ddl = resolve_sql_template(
         'index_sis_appointment_advisors.template.sql')
     if rds.execute(resolved_ddl):
         app.logger.info('Indexed appointment advisors.')
     else:
         raise BackgroundJobError('Failed to index appointment advisors.')
    def run(self):
        app.logger.info('Starting Canvas schema creation job...')

        canvas_path = get_s3_canvas_daily_path()
        if not s3.get_keys_with_prefix(canvas_path):
            canvas_path = get_s3_canvas_daily_path(datetime.now() -
                                                   timedelta(days=1))
            if not s3.get_keys_with_prefix(canvas_path):
                raise BackgroundJobError(
                    'No timely Canvas data found, aborting')
            else:
                app.logger.info('Falling back to yesterday\'s Canvas data')

        external_schema = app.config['REDSHIFT_SCHEMA_CANVAS']
        s3_prefix = 's3://' + app.config['LOCH_S3_BUCKET'] + '/'
        s3_canvas_data_url = s3_prefix + canvas_path
        s3_canvas_data_path_current_term = s3_prefix + berkeley.s3_canvas_data_path_current_term(
        )

        redshift.drop_external_schema(external_schema)
        resolved_ddl = resolve_sql_template(
            'create_canvas_schema.template.sql',
            loch_s3_canvas_data_path_today=s3_canvas_data_url,
            loch_s3_canvas_data_path_current_term=
            s3_canvas_data_path_current_term,
        )
        if redshift.execute_ddl_script(resolved_ddl):
            verify_external_schema(external_schema, resolved_ddl)
            return 'Canvas schema creation job completed.'
        else:
            raise BackgroundJobError('Canvas schema creation job failed.')
Exemplo n.º 10
0
 def create_advising_note_authors(self):
     resolved_ddl = resolve_sql_template(
         'create_advising_note_authors.template.sql')
     if rds.execute(resolved_ddl):
         app.logger.info('Created advising note authors.')
     else:
         raise BackgroundJobError('Failed to create advising note authors.')
    def create_schema(self):
        base_s3_key = app.config['LOCH_S3_E_I_DATA_PATH']
        external_schema = app.config['REDSHIFT_SCHEMA_E_I_ADVISING_NOTES']
        redshift.drop_external_schema(external_schema)
        # Flatten E&I-sourced JSON files into two schema-friendly JSON files.
        notes = []
        topics = []
        for key in s3.get_keys_with_prefix(base_s3_key):
            if key.endswith('.json'):
                notes_json = s3.get_object_json(key)
                if notes_json and 'notes' in notes_json:
                    notes += notes_json['notes']
                    for note in notes:
                        topics += _extract_topics(note)

        if s3.upload_json(obj=notes, s3_key=f'{base_s3_key}/aggregated_notes/data.json') \
                and s3.upload_json(obj=topics, s3_key=f'{base_s3_key}/aggregated_topics/data.json'):
            # Create schema
            app.logger.info('Executing SQL...')
            resolved_ddl = resolve_sql_template('create_e_i_advising_notes_schema.template.sql')
            if redshift.execute_ddl_script(resolved_ddl):
                verify_external_schema(external_schema, resolved_ddl)
            else:
                raise BackgroundJobError('E&I Advising Notes schema creation job failed.')
        else:
            raise BackgroundJobError('Failed to upload aggregated E&I advising notes and topics.')
Exemplo n.º 12
0
 def index_advising_notes(self):
     resolved_ddl = resolve_sql_template(
         'index_advising_notes.template.sql')
     if rds.execute(resolved_ddl):
         app.logger.info('Indexed advising notes.')
     else:
         raise BackgroundJobError('Failed to index advising notes.')
    def verify_post_transform_statement_count(self, url):
        schema = app.config['REDSHIFT_SCHEMA_LRS']
        resolved_ddl_transient = resolve_sql_template(
            'create_lrs_canvas_explode_table.template.sql',
            redshift_schema_lrs_external=schema,
            canvas_caliper_explode_table='caliper_statements_explode_transient',
            loch_s3_caliper_explode_url=url,
        )
        if redshift.execute_ddl_script(resolved_ddl_transient):
            app.logger.info(f"caliper_statements_explode_transient table created in schema '{schema}'.")
        else:
            raise BackgroundJobError(f"Failed to create caliper_statements_explode_transient table in schema '{schema}'.")

        exploded_statement_response = redshift.fetch(f'select count(*) from {schema}.caliper_statements_explode_transient')
        if exploded_statement_response:
            exploded_statement_count = exploded_statement_response[0].get('count')
        else:
            raise BackgroundJobError(f"Failed to verify caliper_statements_explode_transient table in schema '{schema}'.")

        if exploded_statement_count == self.pre_transform_statement_count:
            app.logger.info(f'Verified {exploded_statement_count} transformed statements migrated to {url}.')
        else:
            raise BackgroundJobError(
                f'Discrepancy between pre-transform statement count ({self.pre_transform_statement_count} statements)'
                f' and transformed statements at {url} ({exploded_statement_count} statements).')
Exemplo n.º 14
0
 def run(self):
     app.logger.info(f'Starting BOAC analytics job...')
     resolved_ddl = resolve_sql_template('create_boac_schema.template.sql')
     if redshift.execute_ddl_script(resolved_ddl):
         return 'BOAC analytics creation job completed.'
     else:
         app.logger.error(f'BOAC analytics creation job failed.')
         return False
 def create_internal_schema(self, external_schema, daily_path):
     bucket = app.config['LOCH_S3_PROTECTED_BUCKET']
     s3_data_url = f's3://{bucket}/{daily_path}/advising-notes'
     resolved_ddl = resolve_sql_template('create_sis_advising_notes_schema.template.sql', loch_s3_sis_notes_path_today=s3_data_url)
     if redshift.execute_ddl_script(resolved_ddl):
         verify_external_schema(external_schema, resolved_ddl)
     else:
         raise BackgroundJobError('SIS Advising Notes schema creation job failed to load incremental data and create internal schema.')
Exemplo n.º 16
0
    def create_schema(self):
        app.logger.info('Executing SQL...')
        template_sql = 'create_edl_schema.template.sql'
        resolved_ddl = resolve_sql_template(template_sql)
        if not redshift.execute_ddl_script(resolved_ddl):
            raise BackgroundJobError('EDL SIS schema creation job failed.')
        # Create staging schema
        resolved_ddl_staging = resolve_sql_template(
            template_sql,
            redshift_schema_edl=f'{self.internal_schema}_staging',
        )
        if redshift.execute_ddl_script(resolved_ddl_staging):
            app.logger.info(f"Schema '{self.internal_schema}_staging' found or created.")
        else:
            raise BackgroundJobError(f'{self.internal_schema} schema creation failed.')

        app.logger.info('Redshift EDL schema created.')
 def create_indexes(self):
     resolved_ddl = resolve_sql_template(
         'index_data_science_advising.template.sql')
     if rds.execute(resolved_ddl):
         app.logger.info('Created Data Science Advising RDS indexes.')
     else:
         raise BackgroundJobError(
             'Data Science Advising schema creation job failed to create indexes.'
         )
Exemplo n.º 18
0
 def run(self):
     app.logger.info('Starting ASC schema creation job...')
     app.logger.info('Executing SQL...')
     resolved_ddl = resolve_sql_template('create_asc_schema.template.sql')
     if redshift.execute_ddl_script(resolved_ddl):
         app.logger.info(f"Schema '{app.config['REDSHIFT_SCHEMA_ASC']}' found or created.")
     else:
         raise BackgroundJobError('ASC schema creation failed.')
     return True
Exemplo n.º 19
0
 def create_schema(self):
     app.logger.info('Executing SQL...')
     redshift.drop_external_schema(external_schema)
     resolved_ddl = resolve_sql_template(
         'create_sis_terms_schema.template.sql')
     if redshift.execute_ddl_script(resolved_ddl):
         verify_external_schema(external_schema, resolved_ddl)
     else:
         raise BackgroundJobError('SIS terms schema creation job failed.')
 def create_historical_tables(self, external_schema):
     resolved_ddl = resolve_sql_template(
         'create_sis_advising_notes_historical_schema.template.sql')
     if redshift.execute_ddl_script(resolved_ddl):
         verify_external_schema(external_schema, resolved_ddl)
     else:
         raise BackgroundJobError(
             'SIS Advising Notes schema creation job failed to load historical data.'
         )
Exemplo n.º 21
0
 def run(self):
     app.logger.info('Starting RDS index creation job...')
     app.logger.info('Executing SQL...')
     resolved_ddl = resolve_sql_template('create_rds_indexes.template.sql')
     if rds.execute(resolved_ddl):
         app.logger.info('RDS indexes found or created.')
         return True
     else:
         raise BackgroundJobError('RDS index creation failed.')
Exemplo n.º 22
0
 def create_indexes(self):
     resolved_ddl = resolve_sql_template(
         'index_e_i_advising_notes.template.sql')
     if rds.execute(resolved_ddl):
         app.logger.info('Created E&I Advising Notes RDS indexes.')
     else:
         raise BackgroundJobError(
             'E&I Advising Notes schema creation job failed to create indexes.'
         )
Exemplo n.º 23
0
 def run(self):
     app.logger.info(f'Starting intermediate table generation job...')
     resolved_ddl = resolve_sql_template(
         'create_intermediate_schema.template.sql')
     if redshift.execute_ddl_script(resolved_ddl):
         return 'Intermediate table creation job completed.'
     else:
         app.logger.error(f'Intermediate table creation job failed.')
         return False
    def run(self):
        app.logger.info('Starting intermediate table generation job...')

        if app.config['FEATURE_FLAG_EDL_SIS_VIEWS']:
            sis_source_schema = app.config['REDSHIFT_SCHEMA_EDL']
            where_clause_exclude_withdrawn = "AND en.enrollment_status_reason <> 'WDRW'"
        else:
            sis_source_schema = app.config['REDSHIFT_SCHEMA_SIS']
            where_clause_exclude_withdrawn = f"""/* Enrollment with no primary section is likely a withdrawal. */
                AND EXISTS (
                    SELECT
                        en0.term_id,
                        en0.section_id,
                        en0.ldap_uid
                    FROM {app.config['REDSHIFT_SCHEMA_SIS']}.enrollments en0
                    JOIN {app.config['REDSHIFT_SCHEMA_INTERMEDIATE']}.course_sections crs0
                        ON crs0.sis_section_id = en0.section_id
                        AND crs0.sis_term_id = en0.term_id
                    WHERE en0.term_id = en.term_id
                    AND en0.ldap_uid = en.ldap_uid
                    AND crs0.sis_course_name = crs.sis_course_name
                    AND crs0.sis_primary = TRUE
                    AND en0.enrollment_status != 'D'
                    AND en0.grade != 'W'
                )"""

        resolved_ddl_redshift = resolve_sql_template(
            'create_intermediate_schema.template.sql',
            current_term_id=current_term_id(),
            redshift_schema_sis=sis_source_schema,
            where_clause_exclude_withdrawn=where_clause_exclude_withdrawn,
        )
        if redshift.execute_ddl_script(resolved_ddl_redshift):
            app.logger.info('Redshift tables generated.')
        else:
            raise BackgroundJobError('Intermediate table creation job failed.')

        resolved_ddl_rds = resolve_sql_template('update_rds_indexes_sis.template.sql')
        if rds.execute(resolved_ddl_rds):
            app.logger.info('RDS indexes updated.')
        else:
            raise BackgroundJobError('Failed to update RDS indexes for intermediate schema.')

        return 'Intermediate table generation job completed.'
Exemplo n.º 25
0
    def test_execute_ddl_script(self, app, ensure_drop_schema):
        """Executes filled SQL template files one statement at a time."""
        # TODO Test CREATE EXTERNAL SCHEMA and CREATE EXTERNAL TABLE statements.
        resolved_ddl = resolve_sql_template('test_db.template.sql')
        redshift.execute_ddl_script(resolved_ddl)

        schema = psycopg2.sql.Identifier(app.config['REDSHIFT_SCHEMA_BOAC'])
        result = redshift.fetch('SELECT COUNT(*) FROM {schema}.students', schema=schema)
        assert len(result) == 1
        assert result[0]['count'] == 7
Exemplo n.º 26
0
 def run(self):
     app.logger.info('Starting Redshift user privileges job...')
     app.logger.info('Executing SQL...')
     resolved_ddl = resolve_sql_template('restore_redshift_user_privileges.template.sql')
     if redshift.execute_ddl_script(resolved_ddl):
         app.logger.info('Restored Redshift user privileges successfully.')
     else:
         raise BackgroundJobError('Redshift user privilege restoration failed.')
         return False
     return True
 def create_indexes(self):
     resolved_ddl = resolve_sql_template(
         'index_sis_advising_notes.template.sql',
         redshift_schema=app.config['REDSHIFT_SCHEMA_EDL'] if self.feature_flag_edl
         else app.config['REDSHIFT_SCHEMA_SIS_ADVISING_NOTES_INTERNAL'],
     )
     if rds.execute(resolved_ddl):
         app.logger.info('Created SIS Advising Notes RDS indexes.')
     else:
         raise BackgroundJobError('SIS Advising Notes schema creation job failed to create indexes.')
Exemplo n.º 28
0
 def create_rds_tables_and_indexes(self):
     resolved_ddl = resolve_sql_template(
         'index_oua_admissions.template.sql')
     if rds.execute(resolved_ddl):
         app.logger.info(
             'Created OUA Slate RDS tables and indexes successfully')
     else:
         raise BackgroundJobError(
             'OUA Slate schema creation job failed to create rds tables and indexes.'
         )
    def create_schema(self):
        external_schema = app.config['REDSHIFT_SCHEMA_DATA_SCIENCE_ADVISING']
        redshift.drop_external_schema(external_schema)

        resolved_ddl = resolve_sql_template(
            'create_data_science_advising_schema.template.sql')
        if redshift.execute_ddl_script(resolved_ddl):
            verify_external_schema(external_schema, resolved_ddl)
        else:
            raise BackgroundJobError(
                'Data Science Advising schema creation job failed.')
Exemplo n.º 30
0
 def run(self):
     app.logger.info('Starting EDW schema creation job...')
     app.logger.info('Executing SQL...')
     external_schema = app.config['REDSHIFT_SCHEMA_EDW']
     redshift.drop_external_schema(external_schema)
     resolved_ddl = resolve_sql_template('create_edw_schema.template.sql')
     if redshift.execute_ddl_script(resolved_ddl):
         verify_external_schema(external_schema, resolved_ddl)
         return 'EDW schema creation job completed.'
     else:
         raise BackgroundJobError('EDW schema creation job failed.')