Exemplo n.º 1
0
def get_all_instructor_uids():
    sql = f"""SELECT DISTINCT instructor_uid
              FROM {sis_schema()}.courses
              WHERE instructor_uid IS NOT NULL AND instructor_uid != ''
        """
    return redshift.fetch(sql)
Exemplo n.º 2
0
def get_edl_student_registrations(sids):
    # TODO: After move to EDL is done, compact the SQL below. For now, it is one column name per line, for readability.
    results = []
    count_per_chunk = 10000
    for chunk in range(0, len(sids), count_per_chunk):
        sids_subset = sids[chunk:chunk + count_per_chunk]
        # TODO: This query must follow the 'as_of=near_future' logic used by _get_api_v2_registrations (see NS-1105).
        sql = f"""SELECT
                    r.academic_career_cd,
                    r.academic_level_beginning_of_term_cd,
                    r.academic_level_beginning_of_term_desc,
                    r.academic_level_end_of_term_cd,
                    r.academic_level_end_of_term_desc,
                    r.current_term_gpa_nbr AS current_term_gpa,
                    r.eligibility_status_desc,
                    r.eligible_to_enroll_flag,
                    r.expected_graduation_term,
                    r.intends_to_graduate_flag,
                    r.load_dt AS edl_load_date,
                    r.maximum_term_enrollment_units_limit AS units_term_enrollment_max,
                    r.minimum_term_enrollment_units_limit AS units_term_enrollment_min,
                    r.registered_flag,
                    r.registrn_eligibility_status_cd,
                    r.semester_year_term_cd AS term_id,
                    r.student_id,
                    r.term_enrolled_units AS units_term_enrolled,
                    r.terms_in_attendance,
                    r.total_cumulative_gpa_nbr,
                    r.total_units_completed_qty AS units_term_completed,
                    s.acad_career,
                    s.acad_career_first,
                    s.acad_group_advis,
                    s.acad_level_bot,
                    s.acad_level_eot,
                    s.acad_level_proj,
                    s.acad_load_appr,
                    s.acad_prog_primary,
                    s.acad_year,
                    s.academic_load,
                    s.academic_load_dt,
                    s.billing_career,
                    s.class_rank_nbr,
                    s.class_rank_tot,
                    s.country,
                    s.cum_gpa as gpa_cumulative,
                    s.cum_resident_terms,
                    s.cur_gpa,
                    s.cur_resident_terms,
                    s.elig_to_enroll,
                    s.end_date,
                    s.enrl_on_trans_dt,
                    s.ext_org_id,
                    s.fa_eligibility,
                    s.fa_load,
                    s.fa_stats_calc_dttm,
                    s.fa_stats_calc_req,
                    s.form_of_study,
                    s.fully_enrl_dt,
                    s.fully_graded_dt,
                    s.grade_points,
                    s.grade_points_fa,
                    s.institution,
                    s.last_date_attended,
                    s.lock_in_amt,
                    s.lock_in_dt,
                    s.max_audit_unit,
                    s.max_crse_count,
                    s.max_nogpa_unit,
                    s.max_total_unit,
                    s.max_wait_unit,
                    s.min_total_unit,
                    s.nslds_loan_year,
                    s.ovrd_acad_lvl_all,
                    s.ovrd_acad_lvl_proj,
                    s.ovrd_bill_units,
                    s.ovrd_init_add_fee,
                    s.ovrd_init_enr_fee,
                    s.ovrd_max_units,
                    s.ovrd_tuit_group,
                    s.ovrd_wdrw_sched,
                    s.pro_rata_eligible,
                    s.proj_bill_unt,
                    s.refund_pct,
                    s.refund_scheme,
                    s.reg_card_date,
                    s.registered,
                    s.reset_cum_stats,
                    s.sel_group,
                    s.ssr_activation_dt,
                    s.ssr_comb_cur_gpa,
                    s.ssr_cum_en_gpa,
                    s.ssr_cum_tr_gpa,
                    s.ssr_tot_en_grdpts,
                    s.ssr_tot_en_tkngpa,
                    s.ssr_tot_tr_grdpts,
                    s.ssr_tot_tr_tkngpa,
                    s.ssr_trf_cur_gpa,
                    s.start_date,
                    s.stats_on_trans_dt,
                    s.stdnt_car_nbr,
                    s.study_agreement,
                    s.tc_units_adjust,
                    s.term_type,
                    s.tot_audit,
                    s.tot_cumulative AS units_cumulative,
                    s.tot_grade_points,
                    s.tot_grd_points_fa,
                    s.tot_inprog_gpa,
                    s.tot_inprog_nogpa,
                    s.tot_other,
                    s.tot_passd_fa,
                    s.tot_passd_gpa,
                    s.tot_passd_nogpa,
                    s.tot_passd_prgrss,
                    s.tot_taken_fa,
                    s.tot_taken_fa_gpa,
                    s.tot_taken_gpa,
                    s.tot_taken_nogpa,
                    s.tot_taken_prgrss,
                    s.tot_test_credit,
                    s.tot_trnsfr,
                    s.trf_grade_points,
                    s.trf_passed_gpa,
                    s.trf_passed_nogpa,
                    s.trf_resident_terms,
                    s.trf_taken_gpa,
                    s.trf_taken_nogpa,
                    s.tuit_calc_dttm,
                    s.tuit_calc_req,
                    s.tuition_res_terms,
                    s.unit_multiplier,
                    s.unt_audit,
                    s.unt_inprog_gpa,
                    s.unt_inprog_nogpa,
                    s.unt_other,
                    s.unt_passd_fa,
                    s.unt_passd_gpa,
                    s.unt_passd_nogpa,
                    s.unt_passd_prgrss,
                    s.unt_taken_fa,
                    s.unt_taken_fa_gpa,
                    s.unt_taken_gpa,
                    s.unt_taken_nogpa,
                    s.unt_taken_prgrss,
                    s.unt_term_tot,
                    s.unt_test_credit,
                    s.unt_trnsfr,
                    s.untprg_chg_nslc_dt,
                    s.withdraw_code,
                    s.withdraw_date,
                    s.withdraw_reason
                  FROM {edl_external_schema()}.student_registration_term_data r
                  JOIN {edl_external_schema_staging()}.cs_ps_stdnt_car_term s
                    ON r.student_id = s.emplid AND r.semester_year_term_cd = s.strm
                  WHERE r.student_id=ANY('{{{','.join(sids_subset)}}}')
                  ORDER BY r.student_id, r.semester_year_term_cd DESC
            """
        results += redshift.fetch(sql)
    return results
Exemplo n.º 3
0
    def test_generate_demographics_feeds(self, app, student_tables):
        """Builds JSON feeds and uploads to S3."""
        from nessie.jobs.create_edl_schema import CreateEdlSchema
        with override_config(app, 'FEATURE_FLAG_EDL_STUDENT_PROFILES', True):
            with override_config(app, 'FEATURE_FLAG_EDL_DEMOGRAPHICS', True):
                with mock_s3(app):
                    CreateEdlSchema().generate_demographics_feeds()

            rows = redshift.fetch(f'SELECT * FROM {student_schema()}.student_demographics')
            assert len(rows) == 11
            assert rows[0]['sid'] == '11667051'
            feed = json.loads(rows[0]['feed'])
            assert feed['gender'] == 'Female'
            assert feed['ethnicities'] == ['African-American / Black', 'Chinese / Chinese-American', 'East Indian / Pakistani']
            assert feed['nationalities'] == ['Singapore']
            assert feed['underrepresented'] is True
            assert feed['visa']['type'] == 'PR'
            assert feed['visa']['status'] == 'A'

            assert rows[1]['sid'] == '1234567890'
            feed = json.loads(rows[1]['feed'])
            assert feed['gender'] == 'Male'
            assert feed['ethnicities'] == ['Mexican / Mexican-American / Chicano', 'White']
            assert feed['nationalities'] == ['Iran (Islamic Republic Of)']
            assert feed['underrepresented'] is True
            assert feed['visa']['type'] == 'F1'
            assert feed['visa']['status'] == 'A'

            assert rows[2]['sid'] == '2345678901'
            feed = json.loads(rows[2]['feed'])
            assert feed['gender'] == 'Female'
            assert feed['ethnicities'] == ['White']
            assert feed['nationalities'] == ['Taiwan']
            assert feed['underrepresented'] is False
            assert feed['visa'] is None

            assert rows[3]['sid'] == '3456789012'
            feed = json.loads(rows[3]['feed'])
            assert feed['gender'] == 'Decline to State'
            assert feed['ethnicities'] == ['American Indian / Alaska Native', 'Filipino / Filipino-American']
            assert feed['nationalities'] == ['Korea, Republic of']
            assert feed['underrepresented'] is True
            assert feed['visa']['type'] == 'J1'
            assert feed['visa']['status'] == 'G'

            assert rows[4]['sid'] == '5000000000'
            feed = json.loads(rows[4]['feed'])
            assert feed['gender'] == 'Female'
            assert feed['ethnicities'] == ['Not Specified']
            assert feed['nationalities'] == []
            assert feed['underrepresented'] is False
            assert feed['visa'] is None

            assert rows[7]['sid'] == '8901234567'
            feed = json.loads(rows[7]['feed'])
            assert feed['gender'] == 'Decline to State'
            assert feed['ethnicities'] == ['Not Specified']
            assert feed['nationalities'] == []
            assert feed['underrepresented'] is False
            assert feed['visa'] is None

            assert rows[9]['sid'] == '9000000000'
            feed = json.loads(rows[9]['feed'])
            assert feed['gender'] == 'Nonbinary'
            assert feed['ethnicities'] == ['African-American / Black', 'Other Asian', 'Pacific Islander']
            assert feed['nationalities'] == ["Lao People's Democratic Rep", 'Saint Kitts and Nevis']
            assert feed['underrepresented'] is True
            assert feed['visa'] is None
Exemplo n.º 4
0
def get_all_student_ids():
    sql = f"""SELECT sid FROM {asc_schema()}.students
        UNION SELECT sid FROM {coe_schema()}.students
        UNION SELECT sid FROM {undergrads_schema()}.students
        UNION SELECT sid FROM {advisee_schema()}.non_current_students"""
    return redshift.fetch(sql)
Exemplo n.º 5
0
def get_sis_api_drops_and_midterms(csid, term_ids):
    sql = f"""SELECT term_id, feed from {student_schema()}.sis_api_drops_and_midterms
              WHERE sid='{csid}' AND term_id = ANY('{{{','.join(term_ids)}}}')"""
    return redshift.fetch(sql)
Exemplo n.º 6
0
 def get_imported(self, query):
     schema = app.config['REDSHIFT_SCHEMA_CANVAS_API']
     results = redshift.fetch(query.format(schema))
     return {r['course_id']: r['assignment_count'] for r in results}
Exemplo n.º 7
0
def get_all_student_ids():
    sql = f"""SELECT sid FROM {asc_schema()}.students
        UNION SELECT sid FROM {coe_schema()}.students"""
    return redshift.fetch(sql)
Exemplo n.º 8
0
def get_sis_api_degree_progress(csid):
    sql = f"""SELECT feed from {student_schema()}.sis_api_degree_progress WHERE sid='{csid}'"""
    return redshift.fetch(sql)
Exemplo n.º 9
0
def get_user_for_uid(uid):
    sql = f"""SELECT canvas_id, name, uid
        FROM {intermediate_schema()}.users
        WHERE uid = {uid}
        """
    return redshift.fetch(sql)
Exemplo n.º 10
0
def get_successfully_backfilled_students():
    sql = f"""SELECT sid
        FROM {metadata_schema()}.merged_feed_status
        WHERE term_id = 'all' AND status = 'success'"""
    return redshift.fetch(sql)
Exemplo n.º 11
0
def get_submissions_turned_in_relative_to_user(user_id):
    sql = f"""SELECT course_id, canvas_user_id, submissions_turned_in
              FROM {boac_schema()}.assignment_submissions_relative
              WHERE reference_user_id = {user_id}"""
    return redshift.fetch(sql)
Exemplo n.º 12
0
    def test_resync_canvas_snapshots(self, app, metadata_db, caplog):
        """Dispatches a complete resync job against fixtures."""
        caplog.set_level(logging.INFO)
        snapshots = canvas_data.get_snapshots()['files']

        def mock_metadata(job_id, snapshot, status, destination_size):
            metadata.create_canvas_sync_status(job_id, snapshot['filename'],
                                               snapshot['table'],
                                               snapshot['url'])
            key = '/'.join([
                get_s3_canvas_daily_path(), snapshot['table'],
                snapshot['filename']
            ])
            metadata.update_canvas_sync_status(
                job_id,
                key,
                status,
                source_size=1048576,
                destination_size=destination_size)

        old_sync_job = 'sync_152550000'
        latest_sync_job = 'sync_152560000'

        # The older job should be ignored by the resync.
        for snapshot in snapshots[0:5]:
            mock_metadata(old_sync_job, snapshot, 'complete', 1048576)
        for snapshot in snapshots[5:10]:
            mock_metadata(old_sync_job, snapshot, 'error', None)

        # The latest job synced five files successfully and ran into three problems.
        for snapshot in snapshots[10:15]:
            mock_metadata(latest_sync_job, snapshot, 'complete', 1048576)
        stalled = snapshots[15]
        errored = snapshots[16]
        size_discrepancy = snapshots[17]
        mock_metadata(latest_sync_job, stalled, 'streaming', None)
        mock_metadata(latest_sync_job, errored, 'error', None)
        mock_metadata(latest_sync_job, size_discrepancy, 'complete', 65536)

        schema = app.config['REDSHIFT_SCHEMA_METADATA']

        with capture_app_logs(app):
            assert redshift.fetch(
                f'SELECT count(*) FROM {schema}.canvas_sync_job_status'
            )[0]['count'] == 18
            with mock_s3(app):
                result = ResyncCanvasSnapshots().run_wrapped()
            assert 'Canvas snapshot resync job dispatched to workers' in result
            assert_background_job_status('resync')
            assert f"Dispatched S3 resync of snapshot {stalled['filename']}" in caplog.text
            assert f"Dispatched S3 resync of snapshot {errored['filename']}" in caplog.text
            assert f"Dispatched S3 resync of snapshot {size_discrepancy['filename']}" in caplog.text
            assert '3 successful dispatches, 0 failures' in caplog.text

        assert redshift.fetch(
            f'SELECT count(*) FROM {schema}.canvas_sync_job_status'
        )[0]['count'] == 21
        resync_results = redshift.fetch(
            f"SELECT * FROM {schema}.canvas_sync_job_status WHERE job_id LIKE 'resync%'"
        )
        assert len(resync_results) == 3

        urls = []
        for r in resync_results:
            assert r['job_id'].startswith('resync_')
            assert r['filename']
            assert r['canvas_table']
            assert r['created_at']
            assert r['updated_at']
            urls.append(r['source_url'])
        assert stalled['url'] in urls
        assert errored['url'] in urls
        assert size_discrepancy['url'] in urls
Exemplo n.º 13
0
def sis_api_degree_progress(app, student_tables):
    from nessie.externals import redshift
    sql = """SELECT sid, feed FROM student_test.sis_api_degree_progress"""
    return redshift.fetch(sql)