def create_schema(self):
        base_s3_key = app.config['LOCH_S3_E_I_DATA_PATH']
        external_schema = app.config['REDSHIFT_SCHEMA_E_I_ADVISING_NOTES']
        redshift.drop_external_schema(external_schema)
        # Flatten E&I-sourced JSON files into two schema-friendly JSON files.
        notes = []
        topics = []
        for key in s3.get_keys_with_prefix(base_s3_key):
            if key.endswith('.json'):
                notes_json = s3.get_object_json(key)
                if notes_json and 'notes' in notes_json:
                    notes += notes_json['notes']
                    for note in notes:
                        topics += _extract_topics(note)

        if s3.upload_json(obj=notes, s3_key=f'{base_s3_key}/aggregated_notes/data.json') \
                and s3.upload_json(obj=topics, s3_key=f'{base_s3_key}/aggregated_topics/data.json'):
            # Create schema
            app.logger.info('Executing SQL...')
            resolved_ddl = resolve_sql_template('create_e_i_advising_notes_schema.template.sql')
            if redshift.execute_ddl_script(resolved_ddl):
                verify_external_schema(external_schema, resolved_ddl)
            else:
                raise BackgroundJobError('E&I Advising Notes schema creation job failed.')
        else:
            raise BackgroundJobError('Failed to upload aggregated E&I advising notes and topics.')
    def run(self):
        app.logger.info('Starting Canvas schema creation job...')

        canvas_path = get_s3_canvas_daily_path()
        if not s3.get_keys_with_prefix(canvas_path):
            canvas_path = get_s3_canvas_daily_path(datetime.now() -
                                                   timedelta(days=1))
            if not s3.get_keys_with_prefix(canvas_path):
                raise BackgroundJobError(
                    'No timely Canvas data found, aborting')
            else:
                app.logger.info('Falling back to yesterday\'s Canvas data')

        external_schema = app.config['REDSHIFT_SCHEMA_CANVAS']
        s3_prefix = 's3://' + app.config['LOCH_S3_BUCKET'] + '/'
        s3_canvas_data_url = s3_prefix + canvas_path
        s3_canvas_data_path_current_term = s3_prefix + berkeley.s3_canvas_data_path_current_term(
        )

        redshift.drop_external_schema(external_schema)
        resolved_ddl = resolve_sql_template(
            'create_canvas_schema.template.sql',
            loch_s3_canvas_data_path_today=s3_canvas_data_url,
            loch_s3_canvas_data_path_current_term=
            s3_canvas_data_path_current_term,
        )
        if redshift.execute_ddl_script(resolved_ddl):
            verify_external_schema(external_schema, resolved_ddl)
            return 'Canvas schema creation job completed.'
        else:
            raise BackgroundJobError('Canvas schema creation job failed.')
 def create_internal_schema(self, external_schema, daily_path):
     bucket = app.config['LOCH_S3_PROTECTED_BUCKET']
     s3_data_url = f's3://{bucket}/{daily_path}/advising-notes'
     resolved_ddl = resolve_sql_template('create_sis_advising_notes_schema.template.sql', loch_s3_sis_notes_path_today=s3_data_url)
     if redshift.execute_ddl_script(resolved_ddl):
         verify_external_schema(external_schema, resolved_ddl)
     else:
         raise BackgroundJobError('SIS Advising Notes schema creation job failed to load incremental data and create internal schema.')
 def create_historical_tables(self, external_schema):
     resolved_ddl = resolve_sql_template(
         'create_sis_advising_notes_historical_schema.template.sql')
     if redshift.execute_ddl_script(resolved_ddl):
         verify_external_schema(external_schema, resolved_ddl)
     else:
         raise BackgroundJobError(
             'SIS Advising Notes schema creation job failed to load historical data.'
         )
示例#5
0
 def create_schema(self):
     app.logger.info('Executing SQL...')
     redshift.drop_external_schema(external_schema)
     resolved_ddl = resolve_sql_template(
         'create_sis_terms_schema.template.sql')
     if redshift.execute_ddl_script(resolved_ddl):
         verify_external_schema(external_schema, resolved_ddl)
     else:
         raise BackgroundJobError('SIS terms schema creation job failed.')
示例#6
0
 def run(self):
     app.logger.info('Starting EDW schema creation job...')
     app.logger.info('Executing SQL...')
     external_schema = app.config['REDSHIFT_SCHEMA_EDW']
     redshift.drop_external_schema(external_schema)
     resolved_ddl = resolve_sql_template('create_edw_schema.template.sql')
     if redshift.execute_ddl_script(resolved_ddl):
         verify_external_schema(external_schema, resolved_ddl)
         return 'EDW schema creation job completed.'
     else:
         raise BackgroundJobError('EDW schema creation job failed.')
    def create_schema(self):
        external_schema = app.config['REDSHIFT_SCHEMA_DATA_SCIENCE_ADVISING']
        redshift.drop_external_schema(external_schema)

        resolved_ddl = resolve_sql_template(
            'create_data_science_advising_schema.template.sql')
        if redshift.execute_ddl_script(resolved_ddl):
            verify_external_schema(external_schema, resolved_ddl)
        else:
            raise BackgroundJobError(
                'Data Science Advising schema creation job failed.')
示例#8
0
    def create_schema(self):
        external_schema = app.config['REDSHIFT_SCHEMA_ADVISOR']
        redshift.drop_external_schema(external_schema)

        resolved_ddl = resolve_sql_template(
            'create_advisor_schema.template.sql',
            advisor_data_path=self.s3_path(),
        )
        if redshift.execute_ddl_script(resolved_ddl):
            verify_external_schema(external_schema, resolved_ddl)
        else:
            raise BackgroundJobError('Advisor schema creation job failed.')
示例#9
0
 def create_external_schema(self):
     if not self.feature_flag_edl:
         # External schema is necessary when pulling term definitions from S3.
         redshift.drop_external_schema(self.redshift_schema)
         sql_template = 'create_terms_schema.template.sql'
         app.logger.info(f'Executing {sql_template}...')
         resolved_ddl = resolve_sql_template(sql_template)
         if redshift.execute_ddl_script(resolved_ddl):
             verify_external_schema(self.redshift_schema, resolved_ddl)
         else:
             raise BackgroundJobError(
                 'SIS terms schema creation job failed.')
示例#10
0
 def run(self):
     app.logger.info('Starting Gradescope schema creation job...')
     app.logger.info('Executing SQL...')
     # Add frequency of ingest and dynamic path creation logic once ingest mechanisms are confirmed
     # For now adding a top level hierarchy to catalog bulk snapshot.
     external_schema = app.config['REDSHIFT_SCHEMA_GRADESCOPE']
     redshift.drop_external_schema(external_schema)
     resolved_ddl = resolve_sql_template('create_gradescope_schema_template.sql')
     if redshift.execute_ddl_script(resolved_ddl):
         verify_external_schema(external_schema, resolved_ddl)
         return 'Gradescope schema creation job completed.'
     else:
         raise BackgroundJobError('Gradescope schema creation job failed.')
示例#11
0
 def run(self):
     app.logger.info('Starting SIS schema creation job...')
     if not self.update_manifests():
         app.logger.info('Error updating manifests, will not execute schema creation SQL')
         return False
     app.logger.info('Executing SQL...')
     external_schema = app.config['REDSHIFT_SCHEMA_SIS']
     redshift.drop_external_schema(external_schema)
     resolved_ddl = resolve_sql_template('create_sis_schema.template.sql')
     if redshift.execute_ddl_script(resolved_ddl):
         verify_external_schema(external_schema, resolved_ddl)
     else:
         raise BackgroundJobError('SIS schema creation job failed.')
     return 'SIS schema creation job completed.'
示例#12
0
 def run(self):
     app.logger.info('Starting OUA Slate schema creation job...')
     app.logger.info('Executing SQL...')
     self.migrate_oua_sftp_data()
     external_schema = app.config['REDSHIFT_SCHEMA_OUA']
     redshift.drop_external_schema(external_schema)
     resolved_ddl = resolve_sql_template('create_oua_schema_template.sql')
     if redshift.execute_ddl_script(resolved_ddl):
         verify_external_schema(external_schema, resolved_ddl)
         self.create_rds_tables_and_indexes()
         app.logger.info('OUA Slate RDS indexes created.')
         return 'OUA schema creation job completed.'
     else:
         raise BackgroundJobError('OUA Slate schema creation job failed.')
 def create_schema(self):
     external_schema = app.config['REDSHIFT_SCHEMA_ASC_ADVISING_NOTES']
     redshift.drop_external_schema(external_schema)
     asc_data_sftp_path = '/'.join([
         f"s3://{app.config['LOCH_S3_BUCKET']}",
         app.config['LOCH_S3_ASC_DATA_SFTP_PATH'],
     ])
     resolved_ddl = resolve_sql_template(
         'create_asc_advising_notes_schema.template.sql',
         asc_data_sftp_path=asc_data_sftp_path,
     )
     if redshift.execute_ddl_script(resolved_ddl):
         verify_external_schema(external_schema, resolved_ddl)
     else:
         raise BackgroundJobError('ASC Advising Notes schema creation job failed.')
示例#14
0
    def create_schema(self):
        app.logger.info('Executing SQL...')
        redshift.drop_external_schema(self.external_schema)

        s3_sis_daily = get_s3_sis_sysadm_daily_path()
        if not s3.get_keys_with_prefix(s3_sis_daily):
            s3_sis_daily = _get_yesterdays_advisor_data()
        s3_path = '/'.join([f"s3://{app.config['LOCH_S3_BUCKET']}", s3_sis_daily, 'advisors'])

        sql_filename = 'edl_create_advisor_schema.template.sql' if self.feature_flag_edl else 'create_advisor_schema.template.sql'
        resolved_ddl = resolve_sql_template(sql_filename, advisor_data_path=s3_path)
        if not redshift.execute_ddl_script(resolved_ddl):
            raise BackgroundJobError(f'Redshift execute_ddl_script failed on {sql_filename}')

        verify_external_schema(self.external_schema, resolved_ddl)
        app.logger.info('Redshift schema created.')
示例#15
0
    def run(self):
        app.logger.info('Starting Canvas API schema creation job...')
        external_schema = app.config['REDSHIFT_SCHEMA_CANVAS_API']
        s3_prefix = 's3://' + app.config['LOCH_S3_BUCKET'] + '/'
        s3_canvas_api_data_url = s3_prefix + get_s3_canvas_api_path(
            transformed=True)

        redshift.drop_external_schema(external_schema)
        resolved_ddl = resolve_sql_template(
            'create_canvas_api_schema.template.sql',
            loch_s3_canvas_api_data_path=s3_canvas_api_data_url,
        )
        if redshift.execute_ddl_script(resolved_ddl):
            verify_external_schema(external_schema, resolved_ddl)
            return 'Canvas API schema creation job completed.'
        else:
            raise BackgroundJobError('Canvas API schema creation job failed.')
    def run(self):
        app.logger.info('Starting YCBM schema creation job...')

        external_schema = app.config['REDSHIFT_SCHEMA_YCBM']
        redshift.drop_external_schema(external_schema)

        sis_source_schema = app.config['REDSHIFT_SCHEMA_EDL'] if app.config['FEATURE_FLAG_EDL_SIS_VIEWS'] else app.config['REDSHIFT_SCHEMA_SIS']
        resolved_ddl = resolve_sql_template(
            'create_ycbm_schema.template.sql',
            redshift_schema_sis=sis_source_schema,
        )

        if redshift.execute_ddl_script(resolved_ddl):
            verify_external_schema(external_schema, resolved_ddl)
            return 'YCBM schema creation job completed.'
        else:
            raise BackgroundJobError('YCBM schema creation job failed.')
示例#17
0
 def run(self):
     app.logger.info('Starting SIS schema creation job...')
     if not self.update_manifests():
         app.logger.info(
             'Error updating manifests, will not execute schema creation SQL'
         )
         return False
     app.logger.info('Executing SQL...')
     redshift.drop_external_schema(external_schema)
     resolved_ddl = resolve_sql_template('create_sis_schema.template.sql')
     if redshift.execute_ddl_script(resolved_ddl):
         verify_external_schema(external_schema,
                                resolved_ddl,
                                is_zero_count_acceptable=feature_flag_edl())
     else:
         raise BackgroundJobError('SIS schema creation job failed.')
     return 'SIS schema creation job completed.'
    def run(self):
        app.logger.info('Starting CalNet schema creation job...')
        external_schema = app.config['REDSHIFT_SCHEMA_CALNET']
        redshift.drop_external_schema(external_schema)
        sid_snapshot_path = '/'.join([
            f"s3://{app.config['LOCH_S3_BUCKET']}",
            app.config['LOCH_S3_CALNET_DATA_PATH'],
            'sids',
        ])
        resolved_ddl = resolve_sql_template(
            'create_calnet_schema.template.sql',
            sid_snapshot_path=sid_snapshot_path,
        )

        if redshift.execute_ddl_script(resolved_ddl):
            verify_external_schema(external_schema, resolved_ddl)
            return 'CalNet schema creation job completed.'
        else:
            raise BackgroundJobError('CalNet schema creation job failed.')
示例#19
0
 def run(self):
     app.logger.info(f'Starting Canvas schema creation job...')
     external_schema = app.config['REDSHIFT_SCHEMA_CANVAS']
     redshift.drop_external_schema(external_schema)
     resolved_ddl = resolve_sql_template('create_canvas_schema.template.sql')
     if redshift.execute_ddl_script(resolved_ddl):
         app.logger.info(f'Canvas schema creation job completed.')
         return verify_external_schema(external_schema, resolved_ddl)
     else:
         app.logger.error(f'Canvas schema creation job failed.')
         return False
示例#20
0
    def run(self):
        app.logger.info('Starting Undergrads schema creation job...')
        redshift.drop_external_schema(external_schema)
        resolved_ddl = resolve_sql_template('create_undergrads_schema.template.sql')
        if redshift.execute_ddl_script(resolved_ddl):
            app.logger.info('Undergrads external schema created.')
            verify_external_schema(external_schema, resolved_ddl)
        else:
            raise BackgroundJobError('Undergrads external schema creation failed.')
        undergrads_rows = redshift.fetch(f'SELECT * FROM {external_schema}.students ORDER by sid')

        with rds.transaction() as transaction:
            if self.refresh_rds_indexes(undergrads_rows, transaction):
                transaction.commit()
                app.logger.info('Refreshed RDS indexes.')
            else:
                transaction.rollback()
                raise BackgroundJobError('Error refreshing RDS indexes.')

        return 'Undergrads internal schema created.'
    def run(self):
        app.logger.info('Starting OUA Slate schema creation job...')
        app.logger.info('Executing SQL...')

        s3_protected_bucket = app.config['LOCH_S3_PROTECTED_BUCKET']
        oua_slate_sftp_path = app.config[
            'LOCH_S3_SLATE_DATA_SFTP_PATH'] + '/' + self.get_sftp_date_offset(
            ) + '/'
        oua_daily_dest_path = get_s3_oua_daily_path() + '/admissions/'

        # Gets list of keys under SFTP prefix and looks for csv files to migrate to OUA daily location
        keys = s3.get_keys_with_prefix(oua_slate_sftp_path,
                                       full_objects=False,
                                       bucket=s3_protected_bucket)

        if len(keys) > 0:
            for source_key in keys:
                if source_key.endswith('.csv'):
                    destination_key = source_key.replace(
                        oua_slate_sftp_path, oua_daily_dest_path)
                    if not s3.copy(s3_protected_bucket, source_key,
                                   s3_protected_bucket, destination_key):
                        raise BackgroundJobError(
                            f'Copy from SFTP location {source_key} to daily OUA destination {destination_key} failed.'
                        )
            external_schema = app.config['REDSHIFT_SCHEMA_OUA']
            redshift.drop_external_schema(external_schema)
            resolved_ddl = resolve_sql_template(
                'create_oua_schema_template.sql')
            if redshift.execute_ddl_script(resolved_ddl):
                verify_external_schema(external_schema, resolved_ddl)
                self.create_rds_tables_and_indexes()
                app.logger.info('OUA Slate RDS indexes created.')
                return 'OUA schema creation job completed.'

            else:
                raise BackgroundJobError(
                    'OUA Slate schema creation job failed.')

        else:
            return 'No OUA files found in SFTP location today. Skipping OUA data refresh'
示例#22
0
    def run(self):
        app.logger.info('Starting COE schema creation job...')
        redshift.drop_external_schema(external_schema)
        resolved_ddl = resolve_sql_template('create_coe_schema.template.sql')
        # TODO This DDL drops and recreates the internal schema before the external schema is verified. We
        # ought to set up proper staging in conjunction with verification. It's also possible that a persistent
        # external schema isn't needed.
        if redshift.execute_ddl_script(resolved_ddl):
            app.logger.info('COE external schema created.')
            verify_external_schema(external_schema, resolved_ddl)
        else:
            raise BackgroundJobError('COE external schema creation failed.')
        coe_rows = redshift.fetch(
            'SELECT * FROM {schema}.students ORDER by sid',
            schema=internal_schema_identifier,
        )

        profile_rows = []
        index = 1
        for sid, rows_for_student in groupby(coe_rows,
                                             operator.itemgetter('sid')):
            app.logger.info(
                f'Generating COE profile for SID {sid} ({index} of {len(coe_rows)})'
            )
            index += 1
            row_for_student = list(rows_for_student)[0]
            coe_profile = {
                'advisorUid': row_for_student.get('advisor_ldap_uid'),
                'gender': row_for_student.get('gender'),
                'ethnicity': row_for_student.get('ethnicity'),
                'minority': row_for_student.get('minority'),
                'didPrep': row_for_student.get('did_prep'),
                'prepEligible': row_for_student.get('prep_eligible'),
                'didTprep': row_for_student.get('did_tprep'),
                'tprepEligible': row_for_student.get('tprep_eligible'),
                'sat1read': row_for_student.get('sat1read'),
                'sat1math': row_for_student.get('sat1math'),
                'sat2math': row_for_student.get('sat2math'),
                'inMet': row_for_student.get('in_met'),
                'gradTerm': row_for_student.get('grad_term'),
                'gradYear': row_for_student.get('grad_year'),
                'probation': row_for_student.get('probation'),
                'status': row_for_student.get('status'),
            }
            profile_rows.append(encoded_tsv_row([sid,
                                                 json.dumps(coe_profile)]))

        s3_key = f'{get_s3_coe_daily_path()}/coe_profiles.tsv'
        app.logger.info(
            f'Will stash {len(profile_rows)} feeds in S3: {s3_key}')
        if not s3.upload_tsv_rows(profile_rows, s3_key):
            raise BackgroundJobError('Error on S3 upload: aborting job.')

        app.logger.info('Will copy S3 feeds into Redshift...')
        query = resolve_sql_template_string(
            """
            COPY {redshift_schema_coe}.student_profiles
                FROM '{loch_s3_coe_data_path}/coe_profiles.tsv'
                IAM_ROLE '{redshift_iam_role}'
                DELIMITER '\\t';
            """, )
        if not redshift.execute(query):
            raise BackgroundJobError('Error on Redshift copy: aborting job.')

        with rds.transaction() as transaction:
            if self.refresh_rds_indexes(coe_rows, transaction):
                transaction.commit()
                app.logger.info('Refreshed RDS indexes.')
            else:
                transaction.rollback()
                raise BackgroundJobError('Error refreshing RDS indexes.')

        return 'COE internal schema created.'