def import_appointment_advisors(self): sis_notes_schema = app.config['RDS_SCHEMA_SIS_ADVISING_NOTES'] advisor_schema_redshift = app.config['REDSHIFT_SCHEMA_ADVISOR_INTERNAL'] advisor_sids_from_sis_appointments = set( [r['advisor_sid'] for r in rds.fetch(f'SELECT DISTINCT advisor_sid FROM {sis_notes_schema}.advising_appointments')], ) advisor_sids_from_advisors = set( [r['sid'] for r in redshift.fetch(f'SELECT DISTINCT sid FROM {advisor_schema_redshift}.advisor_departments')], ) advisor_sids = list(advisor_sids_from_sis_appointments | advisor_sids_from_advisors) advisor_attributes = calnet.client(app).search_csids(advisor_sids) if not advisor_attributes: raise BackgroundJobError('Failed to fetch note author attributes.') unique_advisor_attributes = list({adv['uid']: adv for adv in advisor_attributes}.values()) with rds.transaction() as transaction: insertable_rows = [] for entry in unique_advisor_attributes: first_name, last_name = calnet.split_sortable_name(entry) insertable_rows.append(tuple((entry.get('uid'), entry.get('csid'), first_name, last_name))) result = transaction.insert_bulk( f'INSERT INTO {sis_notes_schema}.advising_appointment_advisors (uid, sid, first_name, last_name) VALUES %s', insertable_rows, ) if result: transaction.commit() app.logger.info('Imported appointment advisor attributes.') else: transaction.rollback() raise BackgroundJobError('Failed to import appointment advisor attributes.')
def _put_instructor_data_to_s3(uids): app.logger.info(f'Starting CalNet import job for {len(uids)} instructors...') all_attributes = calnet.client(app).search_uids(uids) if len(uids) != len(all_attributes): ldap_uids = [person['uid'] for person in all_attributes] missing = set(uids) - set(ldap_uids) app.logger.warning(f'Looked for {len(uids)} instructor UIDs but only found {len(all_attributes)} : missing {missing}') serialized_data = '' for index, a in enumerate(all_attributes): uid = a['uid'] affiliations = a['affiliations'] first_name, last_name = calnet.split_sortable_name(a) serialized_data += json.dumps({ 'affiliations': ','.join(affiliations) if isinstance(affiliations, list) else affiliations, 'campus_email': a['campus_email'], 'dept_code': calnet.get_dept_code(a), 'email': a['email'], 'first_name': first_name, 'last_name': last_name, 'ldap_uid': uid, 'csid': a['csid'], 'title': a['title'], }) + '\n' s3.upload_data(serialized_data, f'{get_s3_calnet_daily_path()}/instructors/instructors.json') app.logger.info(f'Uploaded data for {len(all_attributes)} instructors')
def _put_advisee_data_to_s3(sids): app.logger.info(f'Starting CalNet import job for {len(sids)} advisees...') all_attributes = calnet.client(app).search_csids(sids) if len(sids) != len(all_attributes): ldap_sids = [person['csid'] for person in all_attributes] missing = set(sids) - set(ldap_sids) app.logger.warning(f'Looked for {len(sids)} advisee SIDs but only found {len(all_attributes)} : missing {missing}') serialized_data = '' for index, a in enumerate(all_attributes): sid = a['csid'] affiliations = a['affiliations'] first_name, last_name = calnet.split_sortable_name(a) # JsonSerDe in Redshift schema creation requires one and only one JSON record per line in text file in S3. serialized_data += json.dumps({ 'affiliations': ','.join(affiliations) if isinstance(affiliations, list) else affiliations, 'campus_email': a['campus_email'], 'email': a['email'], 'first_name': first_name, 'last_name': last_name, 'ldap_uid': a['uid'], 'sid': sid, }) + '\n' s3.upload_data(serialized_data, f'{get_s3_calnet_daily_path()}/advisees/advisees.json') app.logger.info(f'Uploaded data for {len(all_attributes)} advisees')
def _advisor_attributes_by_email(self): data_science_schema = app.config['RDS_SCHEMA_DATA_SCIENCE'] sql = f""" SELECT DISTINCT advisor_email FROM {data_science_schema}.advising_notes WHERE advisor_email IS NOT NULL """ advisor_emails = set([r['advisor_email'] for r in rds.fetch(sql)]) return calnet.client(app).search_emails(list(advisor_emails))
def import_advisor_attributes(self): csid_results = redshift.fetch( resolve_sql_template_string( 'SELECT DISTINCT advisor_sid FROM {redshift_schema_advisor_internal}.advisor_students' ), ) csids = [r['advisor_sid'] for r in csid_results] all_attributes = calnet.client(app).search_csids(csids) if len(csids) != len(all_attributes): ldap_csids = [person['csid'] for person in all_attributes] missing = set(csids) - set(ldap_csids) app.logger.warning( f'Looked for {len(csids)} advisor CSIDs but only found {len(all_attributes)} : missing {missing}' ) advisor_rows = [] total_count = len(all_attributes) for index, a in enumerate(all_attributes): sid = a['csid'] app.logger.info( f'CalNet import: Fetch attributes of advisor {sid} ({index + 1} of {total_count})' ) first_name, last_name = calnet.split_sortable_name(a) data = [ a['uid'], sid, first_name, last_name, a['title'], calnet.get_dept_code(a), a['email'], a['campus_email'], ] advisor_rows.append(encoded_tsv_row(data)) s3_key = f'{get_s3_calnet_daily_path()}/advisors/advisors.tsv' app.logger.info( f'Will stash {len(advisor_rows)} feeds in S3: {s3_key}') if not s3.upload_tsv_rows(advisor_rows, s3_key): raise BackgroundJobError('Error on S3 upload: aborting job.') app.logger.info('Will copy S3 feeds into Redshift...') query = resolve_sql_template_string( """ TRUNCATE {redshift_schema_advisor_internal}.advisor_attributes; COPY {redshift_schema_advisor_internal}.advisor_attributes FROM '{loch_s3_calnet_data_path}/advisors/advisors.tsv' IAM_ROLE '{redshift_iam_role}' DELIMITER '\\t'; """, ) if not redshift.execute(query): app.logger.error('Error on Redshift copy: aborting job.') return False
def _advisor_attributes_by_uid(self): asc_schema = app.config['RDS_SCHEMA_ASC'] e_i_schema = app.config['RDS_SCHEMA_E_I'] advisor_uids_from_asc_notes = set([ r['advisor_uid'] for r in rds.fetch( f'SELECT DISTINCT advisor_uid FROM {asc_schema}.advising_notes' ) ], ) advisor_uids_from_e_i_notes = set([ r['advisor_uid'] for r in rds.fetch( f'SELECT DISTINCT advisor_uid FROM {e_i_schema}.advising_notes' ) ], ) advisor_uids = list(advisor_uids_from_asc_notes | advisor_uids_from_e_i_notes) return calnet.client(app).search_uids(advisor_uids)
def _advisor_attributes_by_sid(self): sis_notes_schema = app.config['RDS_SCHEMA_SIS_ADVISING_NOTES'] advisor_schema_redshift = app.config[ 'REDSHIFT_SCHEMA_ADVISOR_INTERNAL'] advisor_sids_from_sis_notes = set([ r['advisor_sid'] for r in rds.fetch( f'SELECT DISTINCT advisor_sid FROM {sis_notes_schema}.advising_notes' ) ], ) advisor_sids_from_advisors = set([ r['sid'] for r in redshift.fetch( f'SELECT DISTINCT sid FROM {advisor_schema_redshift}.advisor_departments' ) ], ) advisor_sids = list(advisor_sids_from_sis_notes | advisor_sids_from_advisors) return calnet.client(app).search_csids(advisor_sids)
def _put_calnet_data_to_s3(sids): all_attributes = calnet.client(app).search_csids(sids) if len(sids) != len(all_attributes): ldap_sids = [l['csid'] for l in all_attributes] missing = set(sids) - set(ldap_sids) app.logger.warning( f'Looked for {len(sids)} SIDs but only found {len(all_attributes)} : missing {missing}' ) serialized_data = '' total_count = len(all_attributes) for index, a in enumerate(all_attributes): sid = a['csid'] app.logger.info( f'CalNet import: Fetch attributes of student {sid} ({index + 1} of {total_count})' ) affiliations = a['affiliations'] first_name, last_name = _split_sortable_name(a) # JsonSerDe in Redshift schema creation requires one and only one JSON record per line in text file in S3. serialized_data += json.dumps({ 'affiliations': ','.join(affiliations) if isinstance(affiliations, list) else affiliations, 'campus_email': a['campus_email'], 'email': a['email'], 'first_name': first_name, 'last_name': last_name, 'ldap_uid': a['uid'], 'sid': sid, }) + '\n' s3.upload_data(serialized_data, f'{get_s3_calnet_daily_path()}/persons.json')