Пример #1
0
    def get_integration_demographic_counts(self):
        demographics = [
            'dmg_eth_hsp', 'dmg_eth_ami', 'dmg_eth_asn', 'dmg_eth_blk',
            'dmg_eth_pcf', 'dmg_eth_wht', 'dmg_eth_2om', 'dmg_prg_iep',
            'dmg_prg_lep', 'dmg_prg_504', 'dmg_sts_ecd', 'dmg_sts_mig'
        ]
        results_dict = {}
        with get_udl_connection() as conn:
            int_outcome = conn.get_table('int_sbac_asmt_outcome')
            for entry in demographics:
                query = select(
                    [func.count(int_outcome.c[entry])],
                    from_obj=int_outcome).where(int_outcome.c[entry] == true())
                result = conn.execute(query)
                for row in result:
                    demo_count = row[0]

                results_dict[entry] = demo_count

            #get derived ethnicity
            eth_query = select(
                [func.count(int_outcome.c[entry])],
                from_obj=int_outcome).where(int_outcome.c[entry] is not None)
            result = conn.execute(eth_query)
            for row in result:
                derived_count = row[0]
            results_dict['dmg_eth_derived'] = derived_count

        return results_dict
Пример #2
0
    def get_integration_asmt_score_avgs(self):
        with get_udl_connection() as conn:
            int_outcome = conn.get_table('int_sbac_asmt_outcome')
            query = select([
                func.avg(int_outcome.c.score_asmt),
                func.avg(int_outcome.c.score_asmt_min),
                func.avg(int_outcome.c.score_asmt_max),
                func.avg(int_outcome.c.score_claim_1),
                func.avg(int_outcome.c.score_claim_1_min),
                func.avg(int_outcome.c.score_claim_1_max),
                func.avg(int_outcome.c.score_claim_2),
                func.avg(int_outcome.c.score_claim_2_min),
                func.avg(int_outcome.c.score_claim_2_max),
                func.avg(int_outcome.c.score_claim_3),
                func.avg(int_outcome.c.score_claim_3_min),
                func.avg(int_outcome.c.score_claim_3_max),
                func.avg(int_outcome.c.score_claim_4),
                func.avg(int_outcome.c.score_claim_4_min),
                func.avg(int_outcome.c.score_claim_4_max)
            ],
                           from_obj=int_outcome)
            result = conn.execute(query)
            for row in result:
                asmt_avgs = row

            return asmt_avgs
Пример #3
0
 def truncate_udl_tables(self):
     with get_udl_connection() as conn:
         tables = get_tables_starting_with(conn.get_metadata(), 'int_') + \
             get_tables_starting_with(conn.get_metadata(), 'stg_') + ['err_list', 'udl_batch']
         for t in tables:
             table = conn.get_table(t)
             conn.execute(table.delete())
    def empty_table(self):
        #Delete all data from batch_table
        with get_udl_connection() as connector:
            batch_table = connector.get_table(Constants.UDL2_BATCH_TABLE)
            result = connector.execute(batch_table.delete())
            query = select([batch_table])
            result1 = connector.execute(query).fetchall()
            number_of_row = len(result1)
            self.assertEqual(number_of_row, 0)

        #Delete all data from err_list
            err_list_table = connector.get_table('err_list')
            delete_data = connector.execute(err_list_table.delete())
            query_table = select([err_list_table])
            query_result = connector.execute(query_table).fetchall()
            number_of_row = len(query_result)
            self.assertEqual(number_of_row, 0)

        #Delete all data from udl_stats table
        with StatsDBConnection() as conn:
            table = conn.get_table('udl_stats')
            conn.execute(table.delete())
            query = select([table])
            query_tab = conn.execute(query).fetchall()
            no_rows = len(query_tab)
def get_transformation_rule_names(ref_table_name):
    '''
    Get a list of all used transformation rule names from the database
    @param engine: sqlalchemy engine object
    @param conn: sqlalchemy connection object
    @param ref_schema: the name of the reference schema
    @param ref_table_name: the name of the reference table containing the column mapping info
    @return: The list of transformations rules without duplicates
    @rtype: list
    '''
    with get_udl_connection() as conn:
        # get column_mapping table object
        col_map_table = conn.get_table(ref_table_name)
        trans_rules = []

        # Create select statement to get distinct transformation rules
        select_stmt = select([col_map_table.c.transformation_rule]).distinct()

        # Put each rule in list and return
        for row in conn.execute(select_stmt):
            rule = row[0]
            if rule:
                trans_rules.append(rule)

        return trans_rules
def move_data_from_staging_to_integration_one_by_one(
        source_table_name, target_table_name, err_list_table_name, guid_batch,
        target_columns, source_columns_with_tran_rule):
    success = 0
    fail = 0
    with get_udl_connection() as conn:
        source_table = conn.get_table(source_table_name)
        select_source_table = select(
            [source_table.c.record_sid.label('record_sid')],
            from_obj=[source_table
                      ]).where(source_table.c.guid_batch == guid_batch)
        results = conn.get_result(select_source_table)
        for result in results:
            try:
                record_sid = result.get('record_sid')
                query_result = move_data_from_staging_to_integration_all(
                    source_table_name,
                    target_table_name,
                    err_list_table_name,
                    guid_batch,
                    target_columns,
                    source_columns_with_tran_rule,
                    record_sid=record_sid)
                success += query_result
            except:
                logger.error('Failed to integrate record: batch_guid[' +
                             guid_batch + '] record_sid[' + str(record_sid) +
                             ']')
                fail += 1
    return success, fail
def get_asmt_rec_id(guid_batch, tenant_name, asmt_rec_id_info):
    '''
    Returns asmt_rec_id from dim_asmt table
    Steps:
    1. Get guid_asmt from integration table INT_SBAC_ASMT
    2. Select asmt_rec_id from dim_asmt by the same guid_amst got from 1. It should have 1 value
    '''
    source_table_name = asmt_rec_id_info['source_table']
    guid_column_name_in_source = asmt_rec_id_info['guid_column_in_source']
    target_table_name = asmt_rec_id_info['target_table']
    guid_column_name_in_target = asmt_rec_id_info['guid_column_name']
    rec_id_column_name = asmt_rec_id_info['rec_id']

    # connect to integration table, to get the value of guid_asmt
    with get_udl_connection() as udl_conn:
        int_table = udl_conn.get_table(source_table_name)
        query = select([int_table.c[guid_column_name_in_source]], from_obj=int_table, limit=1)
        query = query.where(int_table.c['guid_batch'] == guid_batch)
        results = udl_conn.get_result(query)
        if results:
            guid_column_value = results[0][guid_column_name_in_source]

    # connect to target table, to get the value of asmt_rec_id
    with get_target_connection(tenant_name, guid_batch) as target_conn:
        dim_asmt = target_conn.get_table(target_table_name)
        query = select([dim_asmt.c[rec_id_column_name]], from_obj=dim_asmt, limit=1)
        query = query.where(dim_asmt.c[guid_column_name_in_target] == guid_column_value)
        query = query.where(and_(dim_asmt.c['batch_guid'] == guid_batch))
        results = target_conn.get_result(query)
        if results:
            asmt_rec_id = results[0][rec_id_column_name]

    return asmt_rec_id
Пример #8
0
    def empty_table(self):
        #Delete all data from batch_table
        with get_udl_connection() as connector:
            batch_table = connector.get_table(Constants.UDL2_BATCH_TABLE)
            result = connector.execute(batch_table.delete())
            query = select([batch_table])
            result1 = connector.execute(query).fetchall()
            number_of_row = len(result1)
            self.assertEqual(number_of_row, 0)

            #Delete all data from err_list
            err_list_table = connector.get_table('err_list')
            delete_data = connector.execute(err_list_table.delete())
            query_table = select([err_list_table])
            query_result = connector.execute(query_table).fetchall()
            number_of_row = len(query_result)
            self.assertEqual(number_of_row, 0)

        #Delete all data from udl_stats table
        with StatsDBConnection() as conn:
            table = conn.get_table('udl_stats')
            conn.execute(table.delete())
            query = select([table])
            query_tab = conn.execute(query).fetchall()
            no_rows = len(query_tab)
Пример #9
0
    def get_staging_demographic_counts(self):
        demographics = ['hispanicorlatinoethnicity', 'americanindianoralaskanative', 'asian', 'blackorafricanamerican',
                        'nativehawaiianorotherpacificislander', 'white', 'demographicracetwoormoreraces',
                        'ideaindicator', 'lepstatus', 'section504status', 'economicdisadvantagestatus',
                        'migrantstatus']
        results_dict = {}
        with get_udl_connection() as conn:
            stg_outcome = conn.get_table('stg_sbac_asmt_outcome')
            for entry in demographics:
                query = select([func.count(stg_outcome.c[entry])], from_obj=stg_outcome).where(stg_outcome.c[entry].in_(['Y', 'y', 'yes']))
                result = conn.execute(query)
                for row in result:
                    demo_count = row[0]

                results_dict[entry] = demo_count

        corrleated_results = {
            'dmg_eth_hsp': results_dict['hispanicorlatinoethnicity'],
            'dmg_eth_ami': results_dict['americanindianoralaskanative'],
            'dmg_eth_asn': results_dict['asian'],
            'dmg_eth_blk': results_dict['blackorafricanamerican'],
            'dmg_eth_pcf': results_dict['nativehawaiianorotherpacificislander'],
            'dmg_eth_wht': results_dict['white'],
            'dmg_eth_2om': results_dict['demographicracetwoormoreraces'],
            'dmg_prg_iep': results_dict['ideaindicator'],
            'dmg_prg_lep': results_dict['lepstatus'],
            'dmg_prg_504': results_dict['section504status'],
            'dmg_sts_ecd': results_dict['economicdisadvantagestatus'],
            'dmg_sts_mig': results_dict['migrantstatus'],
        }

        return corrleated_results
Пример #10
0
def drop_schema(schema_name):
    '''
    drop schemas according to configuration file
    @param udl2_conf: The configuration dictionary for
    '''
    with get_udl_connection() as conn:
        conn.execute(DropSchema(schema_name, cascade=True))
def update_column_mappings(rule_map_list, ref_table_name):
    '''
    loop through the column mapping rows in the database and populate the
    stored procedure column based on the transformation name
    @param rule_map_list: A list of tuples containing mapping info. Tuples should be: (rule_name, proc_name)
    @param engine: sqlalchemy engine object
    @param conn: sqlalchemy connection object
    @param ref_schema: the name of the reference schema
    @param ref_table_name: the name of the reference table containing the column mapping info
    '''

    # check that list is not empty before preceding.
    if not rule_map_list:
        print('NO FUNCTIONS ADDED TO DATABASE')
        return
    with get_udl_connection() as conn:
        # get column_mapping table object
        col_map_table = conn.get_table(ref_table_name)

        # Generate sql to perform update
        update_stmt = col_map_table.update().where(col_map_table.c.transformation_rule == bindparam('rule_name'))
        update_stmt = update_stmt.values(stored_proc_name=bindparam('proc_name'), stored_proc_created_date=datetime.datetime.now())

        # Create list of dicts that sqlalchemy will recognize
        # to update all rules with corresponding stored procedure.
        for pair in rule_map_list:
            conn.execute(update_stmt, rule_name=pair[0], proc_name=pair[1])
 def get_err_list(self):
     with get_udl_connection() as conn:
         err_list_table = conn.get_table('err_list')
         query = select([err_list_table]).where(
             err_list_table.c['guid_batch'].__eq__(self.guid_batch))
         result = conn.get_result(query)
         return result
def move_data_from_staging_to_integration_all(source_table_name,
                                              target_table_name,
                                              err_list_table_name,
                                              guid_batch,
                                              target_columns,
                                              source_columns_with_tran_rule,
                                              record_sid=None):
    with get_udl_connection() as conn:

        sql_query = create_migration_query(conn,
                                           source_table_name,
                                           target_table_name,
                                           err_list_table_name,
                                           guid_batch,
                                           target_columns,
                                           source_columns_with_tran_rule,
                                           record_sid=record_sid)
        except_msg = "problem when load data from staging table to integration table"
        query_result = execute_udl_queries(
            conn, [sql_query],
            except_msg,
            'move_to_integration',
            'move_data_from_staging_to_integration',
            tries=-1)
    return query_result[0]
    def setUp(self):
        try:
            config_path = dict(os.environ)['UDL2_CONF']
        except Exception:
            config_path = UDL2_DEFAULT_CONFIG_PATH_FILE

        conf_tup = read_ini_file(config_path)
        udl2_conf = conf_tup[0]
        initialize_db_udl(udl2_conf)
        self.ref_schema = udl2_conf['udl2_db_conn']['db_schema']
        self.ref_table_name = Constants.UDL2_REF_MAPPING_TABLE(Constants.LOAD_TYPE_ASSESSMENT)

        # Testable Rules
        self.rule_names = transform_rules.keys()
        self.rule_conf = transform_rules
        self.rule_list = transformation_code_generator.generate_transformations(self.rule_names, rule_conf=self.rule_conf)
        self.testable_rules = []
        for rule in self.rule_list:
            self.testable_rules.append(rule[0])

        test_rows = []
        for rule in self.testable_rules:
            ins_dict = {
                'phase': -999,
                'source_table': 'ftest_table',
                'source_column': 'ftest_column',
                'target_table': 'ftest_table1',
                'target_column': 'ftest_column1',
                'transformation_rule': rule,
            }
            test_rows.append(ins_dict)
        with get_udl_connection() as conn:
            self.ref_table = conn.get_table(self.ref_table_name)
            conn.execute(self.ref_table.insert(test_rows))
    def compare_csv_table_data(self, csv_file, key_column):
        table_name = self.conf[mk.TARGET_DB_TABLE]
        guid_batch = self.conf['guid_batch']
        result_key = 'studentidentifier' if table_name == 'stg_sbac_asmt_outcome' else 'guid_student'
        with get_udl_connection() as conn:
            table = conn.get_table(table_name)
            query = select([table]).where(table.c.guid_batch == guid_batch)
            results = conn.execute(query)
            result_list = results.fetchall()
            expected_rows = self.get_clean_rows_from_file(csv_file)
            # sort rows
            student_id_index = results.keys().index(result_key)  # Determine index of guid_student in results
            result_list = sorted(result_list, key=lambda i: i[student_id_index])  # sort results using this index
            expected_rows = sorted(expected_rows, key=lambda k: k[key_column])  # sort expected based on the key
            # Loop through rows
            for i in range(len(result_list)):
                res_row = result_list[i]
                expect_row = expected_rows[i]

            # Loop through columns
            for ci in range(len(res_row)):
                if results.keys()[ci] in expect_row:
                    # if column is in the expected data
                    # change_empty_vals_to_none() converts all 0's and empty strings to None
                    self.assertEqual(self.change_empty_vals_to_none(res_row[ci]),
                                     self.change_empty_vals_to_none(expect_row[results.keys()[ci]]),
                                     'Values are not the same for column %s' % results.keys()[ci])
 def get_row_number_in_table(self):
     with get_udl_connection() as conn:
         table = conn.get_table(self.conf[mk.TARGET_DB_TABLE])
         guid_batch = self.conf['guid_batch']
         query = select([table]).where(table.c.guid_batch == guid_batch)
         result = conn.execute(query)
         return result.rowcount
def get_transformation_rule_names(ref_table_name):
    '''
    Get a list of all used transformation rule names from the database
    @param engine: sqlalchemy engine object
    @param conn: sqlalchemy connection object
    @param ref_schema: the name of the reference schema
    @param ref_table_name: the name of the reference table containing the column mapping info
    @return: The list of transformations rules without duplicates
    @rtype: list
    '''
    with get_udl_connection() as conn:
        # get column_mapping table object
        col_map_table = conn.get_table(ref_table_name)
        trans_rules = []

        # Create select statement to get distinct transformation rules
        select_stmt = select([col_map_table.c.transformation_rule]).distinct()

        # Put each rule in list and return
        for row in conn.execute(select_stmt):
            rule = row[0]
            if rule:
                trans_rules.append(rule)

        return trans_rules
def update_column_mappings(rule_map_list, ref_table_name):
    '''
    loop through the column mapping rows in the database and populate the
    stored procedure column based on the transformation name
    @param rule_map_list: A list of tuples containing mapping info. Tuples should be: (rule_name, proc_name)
    @param engine: sqlalchemy engine object
    @param conn: sqlalchemy connection object
    @param ref_schema: the name of the reference schema
    @param ref_table_name: the name of the reference table containing the column mapping info
    '''

    # check that list is not empty before preceding.
    if not rule_map_list:
        print('NO FUNCTIONS ADDED TO DATABASE')
        return
    with get_udl_connection() as conn:
        # get column_mapping table object
        col_map_table = conn.get_table(ref_table_name)

        # Generate sql to perform update
        update_stmt = col_map_table.update().where(
            col_map_table.c.transformation_rule == bindparam('rule_name'))
        update_stmt = update_stmt.values(
            stored_proc_name=bindparam('proc_name'),
            stored_proc_created_date=datetime.datetime.now())

        # Create list of dicts that sqlalchemy will recognize
        # to update all rules with corresponding stored procedure.
        for pair in rule_map_list:
            conn.execute(update_stmt, rule_name=pair[0], proc_name=pair[1])
Пример #19
0
 def truncate_udl_tables(self):
     with get_udl_connection() as conn:
         tables = get_tables_starting_with(conn.get_metadata(), 'int_') + \
             get_tables_starting_with(conn.get_metadata(), 'stg_') + ['err_list', 'udl_batch']
         for t in tables:
             table = conn.get_table(t)
             conn.execute(table.delete())
Пример #20
0
 def get_row_number_in_table(self):
     with get_udl_connection() as conn:
         table = conn.get_table(self.conf[mk.TARGET_DB_TABLE])
         guid_batch = self.conf['guid_batch']
         query = select([table]).where(table.c.guid_batch == guid_batch)
         result = conn.execute(query)
         return result.rowcount
Пример #21
0
def drop_foreign_data_wrapper_extension():
    '''
    drop foreign data wrapper extension according to configuration file
    '''
    print('drop foreign data wrapper extension')
    with get_udl_connection() as conn:
        conn.execute(text("DROP EXTENSION IF EXISTS file_fdw CASCADE"))
Пример #22
0
    def compare_csv_table_data(self, csv_file, key_column):
        table_name = self.conf[mk.TARGET_DB_TABLE]
        guid_batch = self.conf['guid_batch']
        result_key = 'studentidentifier' if table_name == 'stg_sbac_asmt_outcome' else 'guid_student'
        with get_udl_connection() as conn:
            table = conn.get_table(table_name)
            query = select([table]).where(table.c.guid_batch == guid_batch)
            results = conn.execute(query)
            result_list = results.fetchall()
            expected_rows = self.get_clean_rows_from_file(csv_file)
            # sort rows
            student_id_index = results.keys().index(
                result_key)  # Determine index of guid_student in results
            result_list = sorted(result_list,
                                 key=lambda i: i[student_id_index]
                                 )  # sort results using this index
            expected_rows = sorted(
                expected_rows,
                key=lambda k: k[key_column])  # sort expected based on the key
            # Loop through rows
            for i in range(len(result_list)):
                res_row = result_list[i]
                expect_row = expected_rows[i]

            # Loop through columns
            for ci in range(len(res_row)):
                if results.keys()[ci] in expect_row:
                    # if column is in the expected data
                    # change_empty_vals_to_none() converts all 0's and empty strings to None
                    self.assertEqual(
                        self.change_empty_vals_to_none(res_row[ci]),
                        self.change_empty_vals_to_none(
                            expect_row[results.keys()[ci]]),
                        'Values are not the same for column %s' %
                        results.keys()[ci])
Пример #23
0
 def test_derive_eth_function(self):
     function_name = sr.special_rules['deriveEthnicity'][0]
     # dmg_eth_blk, dmg_eth_asn, dmg_eth_hsp, dmg_eth_ami, dmg_eth_pcf, dmg_eth_wht
     prepare_data = {'exception': {'src_column': "'sda', 'dg', 'a', 'q', 't', 'fff', 'z'", 'expected_code': -1},
                     'not stated 1': {'src_column': "NULL, NULL, NULL, NULL, NULL, NULL, NULL", 'expected_code': 0},
                     'not stated 2': {'src_column': "'f', NULL, NULL, 'f', NULL, 'f', NULL", 'expected_code': 0},
                     'african american': {'src_column': "'y', 'n', 'n', 'n', 'n', 'n', 'n'", 'expected_code': 1},
                     'asian': {'src_column': "'n', 'y', 'n', 'n', 'n', 'n', 'n'", 'expected_code': 2},
                     'hispanic 1': {'src_column': "'n', 'n', 'y', 'n', 'n', 'n', 'n'", 'expected_code': 3},
                     'hispanic 2': {'src_column': "'n', 'n', 'y', 'y', 'n', 'y', 'n'", 'expected_code': 3},
                     'hispanic 3': {'src_column': "'n', 'n', 'y', 'n', 'n', 'n', 'y'", 'expected_code': 3},
                     'native american': {'src_column': "'n', 'n', 'n', 'y', 'n', 'n', 'n'", 'expected_code': 4},
                     'pacific islander': {'src_column': "'n', 'n', 'n', 'n', 'y', 'n', 'n'", 'expected_code': 5},
                     'white': {'src_column': "'n', 'n', 'n', 'n', 'n', 'y', 'n'", 'expected_code': 6},
                     'two or more races 1': {'src_column': "'y', 'n', 'n', 'n', 'n', 'y', 'n'", 'expected_code': 1},
                     'two or more races 2': {'src_column': "'n', 'y', 'n', 'n', NULL, 'y', 'n'", 'expected_code': 2},
                     'two or more races 3': {'src_column': "'y', 'y', 'n', 'y', 'y', 'y', 'y'", 'expected_code': 7},
                     'two or more races 4': {'src_column': "'n', 'n', 'n', 'n', 'n', 'n', 'y'", 'expected_code': 7}
                     }
     sql_template = 'SELECT %s;' % function_name
     with get_udl_connection() as conn:
         for _key, value in prepare_data.items():
             sql = sql_template.format(src_column=value['src_column'])
             result = conn.execute(sql)
             actual_value = ''
             for r in result:
                 actual_value = r[0]
                 break
             self.assertEqual(actual_value, value['expected_code'])
 def get_udl_batch(self):
     with get_udl_connection() as conn:
         batch_table = conn.get_table('udl_batch')
         query = select([batch_table]).where(
             batch_table.c['guid_batch'].__eq__(self.guid_batch))
         result = conn.get_result(query)
         return result
Пример #25
0
 def test_get_column_mapping_from_stg_to_int(self):
     expected_target_columns = ['name_state', 'code_state', 'guid_district', 'name_district', 'guid_school', 'name_school',
                                'guid_student', 'external_ssid_student', 'name_student_first', 'name_student_middle', 'name_student_last',
                                'birthdate_student', 'sex_student', 'grade_enrolled', 'dmg_eth_hsp', 'dmg_eth_ami', 'dmg_eth_asn',
                                'dmg_eth_blk', 'dmg_eth_pcf', 'dmg_eth_wht', 'dmg_multi_race', 'dmg_prg_iep', 'dmg_prg_lep', 'dmg_prg_504', 'dmg_sts_ecd',
                                'dmg_sts_mig', 'code_language', 'eng_prof_lvl', 'us_school_entry_date', 'lep_entry_date',
                                'lep_exit_date', 't3_program_type', 'prim_disability_type', 'created_date', 'guid_batch']
     expected_source_columns_with_tran_rule = ['substr("A".name_state, 1, 50)', 'substr("A".code_state, 1, 2)', 'substr("A".guid_district, 1, 40)',
                                               'substr("A".name_district, 1, 60)', 'substr("A".guid_school, 1, 40)', 'substr("A".name_school, 1, 60)',
                                               'substr("A".guid_student, 1, 40)', 'substr("A".external_ssid_student, 1, 40)', 'substr("A".name_student_first, 1, 35)',
                                               'substr("A".name_student_middle, 1, 35)', 'substr("A".name_student_last, 1, 35)', 'substr("A".birthdate_student, 1, 10)',
                                               'substr("A".sex_student, 1, 10)', 'substr("A".grade_enrolled, 1, 2)',
                                               'case "A".dmg_eth_hsp when \'\' then null else cast("A".dmg_eth_hsp as bool) end',
                                               'case "A".dmg_eth_ami when \'\' then null else cast("A".dmg_eth_ami as bool) end',
                                               'case "A".dmg_eth_asn when \'\' then null else cast("A".dmg_eth_asn as bool) end',
                                               'case "A".dmg_eth_blk when \'\' then null else cast("A".dmg_eth_blk as bool) end',
                                               'case "A".dmg_eth_pcf when \'\' then null else cast("A".dmg_eth_pcf as bool) end',
                                               'case "A".dmg_eth_wht when \'\' then null else cast("A".dmg_eth_wht as bool) end',
                                               'case "A".dmg_multi_race when \'\' then null else cast("A".dmg_multi_race as bool) end',
                                               'case "A".dmg_prg_iep when \'\' then null else cast("A".dmg_prg_iep as bool) end',
                                               'case "A".dmg_prg_lep when \'\' then null else cast("A".dmg_prg_lep as bool) end',
                                               'case "A".dmg_prg_504 when \'\' then null else cast("A".dmg_prg_504 as bool) end',
                                               'case "A".dmg_sts_ecd when \'\' then null else cast("A".dmg_sts_ecd as bool) end',
                                               'case "A".dmg_sts_mig when \'\' then null else cast("A".dmg_sts_mig as bool) end',
                                               'substr("A".code_language, 1, 3)', 'substr("A".eng_prof_lvl, 1, 20)', 'substr("A".us_school_entry_date, 1, 10)',
                                               'substr("A".lep_entry_date, 1, 10)', 'substr("A".lep_exit_date, 1, 10)', 'substr("A".t3_program_type, 1, 27)',
                                               'substr("A".prim_disability_type, 1, 3)', '"A".created_date', '"A".guid_batch']
     with get_udl_connection() as conn:
         target_columns, source_columns_with_tran_rule = get_column_mapping_from_stg_to_int(conn,
                                                                                            Constants.UDL2_REF_MAPPING_TABLE(Constants.LOAD_TYPE_STUDENT_REGISTRATION),
                                                                                            'stg_sbac_stu_reg', 'int_sbac_stu_reg')
         self.assertEqual(expected_target_columns, target_columns)
         self.assertEqual(expected_source_columns_with_tran_rule, source_columns_with_tran_rule)
 def validate_err_table(self, guid_batch_id):
     with get_udl_connection() as connector:
         error_table = connector.get_table('err_list')
         error_record = select([error_table.c.err_source_text]).where(error_table.c.guid_batch == guid_batch_id)
         error_result = connector.execute(error_record).fetchall()
         expected_result = [('DELETE_FACT_ASMT_OUTCOME_RECORD_MORE_THAN_ONCE',)]
         self.assertEquals(error_result, expected_result, "Error has not been logged for deleting the same data twice into ERR_LIST table")
 def validate_err_list_table(self, guid_batch_id):
     with get_udl_connection() as connector:
         error_table = connector.get_table('err_list')
         error_record = select([error_table.c.err_code_text]).where(error_table.c.guid_batch == guid_batch_id)
         error_result = connector.execute(error_record).fetchall()
         expected_result = [('DELETE_RECORD_NOT_FOUND',)]
         self.assertEquals(error_result, expected_result, "Error has not been logged into ERR_LIST table")
Пример #28
0
    def record_benchmark(self):
        '''
        Record the benchmark information for the this instance of the benchmarking information
        '''

        with get_udl_connection() as connector:
            batch_table = connector.get_table(Constants.UDL2_BATCH_TABLE)
            connector.execute(batch_table.insert(), self.get_result_dict())
Пример #29
0
def cleanup_udl_tables(guid_batch):
    """
    """
    with get_udl_connection() as connector:
        cleanup_all_tables(connector=connector,
                           column_name='guid_batch', value=guid_batch, batch_delete=True, table_name_prefix='int_')
        cleanup_all_tables(connector=connector,
                           column_name='guid_batch', value=guid_batch, batch_delete=True, table_name_prefix='stg_')
Пример #30
0
 def test_rule_with_inlist_outlist(self):
     for rule in self.rule_list:
         rule_def = self.rule_conf[rule[0]]
         if 'inlist' in rule_def and 'outlist' in rule_def:
             with get_udl_connection() as conn:
                 for (input_val, output_val) in zip(rule_def['inlist'], rule_def['outlist']):
                     result = conn.execute("SELECT %s('%s')" % (rule[1], input_val))
                     self.assertEqual(result.fetchone()[0], output_val)
Пример #31
0
def drop_foreign_data_wrapper_server(fdw_server):
    '''
    drop server for foreign data wrapper according to configuration file
    @param udl2_conf: The configuration dictionary for
    '''
    print('drop foreign data wrapper server')
    with get_udl_connection() as conn:
        conn.execute(text("DROP SERVER IF EXISTS %s CASCADE" % (fdw_server)))
 def empty_batch_table(self):
     with get_udl_connection() as connector:
         batch_table = connector.get_table(Constants.UDL2_BATCH_TABLE)
         result = connector.execute(batch_table.delete())
         query = select([batch_table])
         result1 = connector.execute(query).fetchall()
         number_of_row = len(result1)
         self.assertEqual(number_of_row, 0)
Пример #33
0
def create_foreign_data_wrapper_extension(schema_name):
    '''
    create foreign data wrapper extension according to configuration file
    @param udl2_conf: The configuration dictionary for
    '''
    print('create foreign data wrapper extension')
    with get_udl_connection() as conn:
        conn.execute(text("CREATE EXTENSION IF NOT EXISTS file_fdw"))
Пример #34
0
    def record_benchmark(self):
        '''
        Record the benchmark information for the this instance of the benchmarking information
        '''

        with get_udl_connection() as connector:
            batch_table = connector.get_table(Constants.UDL2_BATCH_TABLE)
            connector.execute(batch_table.insert(), self.get_result_dict())
Пример #35
0
def create_foreign_data_wrapper_server(fdw_server):
    '''
    create server for foreign data wrapper according to configuration file
    @param udl2_conf: The configuration dictionary for
    '''
    print('create foreign data wrapper server')
    with get_udl_connection() as conn:
        conn.execute(text("CREATE SERVER %s FOREIGN DATA WRAPPER file_fdw" % (fdw_server)))
Пример #36
0
def load_to_table(data_dict, guid_batch, int_table, tenant_name, udl_schema):
    '''
    Load the table into the proper table
    @param data_dict: the dictionary containing the data to be loaded
    @param guid_batch: the id for the batch
    @param int_table: the name of the integration table
    @param tenant_name: name of the tenant
    @param udl_schema: udl schema name
    '''
    # Create sqlalchemy connection and get table information from sqlalchemy
    ref_column_mapping_columns = {}
    with get_udl_connection() as conn:
        data_dict[mk.GUID_BATCH] = guid_batch
        data_dict = fix_empty_strings(data_dict)
        ref_table = conn.get_table('ref_column_mapping')
        s_int_table = conn.get_table(int_table)
        column_mapping_query = select([ref_table.c.target_column,
                                       ref_table.c.stored_proc_name],
                                      from_obj=ref_table).where(and_(ref_table.c.source_table == 'lz_json',
                                                                     ref_table.c.target_table == int_table))
        results = conn.get_result(column_mapping_query)
        for result in results:
            target_column = result['target_column']
            stored_proc_name = result['stored_proc_name']
            value = data_dict.get(target_column)
            if value:
                if stored_proc_name:
                    if stored_proc_name.startswith('sp_'):
                        ref_column_mapping_columns[target_column] = stored_proc_name + '(' + QuotedString(value if type(value) is str else str(value)).getquoted().decode('utf-8') + ')'
                    else:
                        format_value = dict()
                        format_value['value'] = QuotedString(value if type(value) is str
                                                             else str(value)).getquoted().decode('utf-8')
                        if s_int_table.c[target_column].type.python_type is str:
                            format_value['length'] = s_int_table.c[target_column].type.length
                        ref_column_mapping_columns[target_column] = stored_proc_name.format(**format_value)
                    continue
            ref_column_mapping_columns[target_column] = value

        record_sid = 'nextval(\'{schema_name}.{tenant_sequence_name}\')'.\
            format(schema_name=udl_schema, tenant_sequence_name=Constants.TENANT_SEQUENCE_NAME(tenant_name))
        from_select_column_names = ['record_sid']
        from_select_select_values = [record_sid]
        for column in s_int_table.c:
            value = data_dict.get(column.name)
            if value is not None:
                from_select_column_names.append(column.name)
                from_select_select_values.append(
                    ref_column_mapping_columns.get(column.name,
                                                   QuotedString(value if type(value) is str else str(value)).getquoted().decode('utf-8')))
        insert_into_int_table = s_int_table.insert().from_select(from_select_column_names,
                                                                 select(from_select_select_values))
        # create insert statement and execute
        affected_row = db_util.execute_udl_queries(conn, [insert_into_int_table],
                                                   'Exception in loading json data -- ',
                                                   'json_loader', 'load_to_table')

    return affected_row[0]
    def test_stored_procedures_exist_in_db(self):

        populate_stored_proc(self.ref_table_name)
        with get_udl_connection() as conn:
            for rule in self.testable_rules:
                stored_proc_query = "SELECT proname FROM pg_proc WHERE proname = 'sp_{0}';".format(rule.lower())
                res = conn.execute(stored_proc_query).fetchall()[0][0]
                expected = 'sp_{0}'.format(rule)
                self.assertEqual(res.lower(), expected.lower())
Пример #38
0
 def tearDown(self):
     table_name = self.conf[mk.TARGET_DB_TABLE]
     guid_batch = self.conf['guid_batch']
     with get_udl_connection() as conn:
         table = conn.get_table(table_name)
         try:
             delete(table).where(table.c.guid_batch == guid_batch)
         except Exception as e:
             print('Exception -- ', e)
Пример #39
0
 def get_rows_in_table(self, columns):
     with get_udl_connection() as conn:
         table = conn.get_table(self.conf[mk.TARGET_DB_TABLE])
         guid_batch = self.conf['guid_batch']
         select_columns = [table.c[column] for column in columns]
         query = select(select_columns).where(table.c.guid_batch == guid_batch)\
                                       .order_by(table.c.src_file_rec_num)
         result = conn.execute(query)
         return result.fetchall()
Пример #40
0
def load_file(conf):
    '''
    Main function to initiate file loader
    '''
    logger.info("Starting data load from csv to staging")
    with get_udl_connection() as conn:
        # start loading file process
        time_for_load_as_seconds = load_data_process(conn, conf)
    logger.info("Data Loaded from csv to Staging in %s seconds" % time_for_load_as_seconds)
 def validate_successful_job_completion(self):
     with get_udl_connection() as connector:
         batch_table = connector.get_table(Constants.UDL2_BATCH_TABLE)
         query = select([batch_table.c.udl_phase_step_status], and_(batch_table.c.guid_batch == self.batch_id, batch_table.c.udl_phase == 'UDL_COMPLETE'))
         result = connector.execute(query).fetchall()
         self.assertNotEqual(result, [])
         for row in result:
             status = row['udl_phase_step_status']
             self.assertEqual(status, NotificationConstants.SUCCESS, 'UDL process completed successfully')
def get_intput_file(batch_guid):
    input_file = ''
    with get_udl_connection() as connector:
        batch_table = connector.get_table(Constants.UDL2_BATCH_TABLE)
        s = select([batch_table.c.input_file.label('input_file')]).where(and_(batch_table.c.udl_phase == 'udl2.W_file_arrived.task', batch_table.c.guid_batch == batch_guid))
        results = connector.get_result(s)
        if results:
            input_file = results[0]['input_file']
    return input_file
 def get_asmt_and_outcome_result(self, conf):
     with get_udl_connection() as conn:
         asmt_table = conn.get_table(conf.get(mk.ASMT_TABLE))
         asmt_outcome_table = conn.get_table(conf.get(mk.ASMT_OUTCOME_TABLE))
         asmt_result = conn.get_result(select([asmt_table.c.guid_asmt]).
                                       where(asmt_table.c.guid_batch == conf.get(mk.GUID_BATCH)))
         asmt_outcome_result = conn.get_result(select([asmt_outcome_table.c.assessmentguid], distinct=True).
                                               where(asmt_outcome_table.c.guid_batch == conf.get(mk.GUID_BATCH)))
     return asmt_result, asmt_outcome_result
 def connect_verify_db(self):
     with get_udl_connection() as connector:
         batch_table = connector.get_table(Constants.UDL2_BATCH_TABLE)
         query = select([batch_table])
         result = connector.execute(query).fetchall()
         output = select([batch_table.c.udl_phase_step_status], and_(batch_table.c.udl_phase == 'UDL_COMPLETE', batch_table.c.guid_batch == self.guid_batch_id))
         output_data = connector.execute(output).fetchall()
         tuple_str = [('SUCCESS',)]
         self.assertEqual(tuple_str, output_data)
    def load_csv_data_to_integration(self, data_file, metadata_file, data_table_name, meta_table_name):

        with get_udl_connection() as udl2_conn:
            data_table = udl2_conn.get_table(data_table_name)
            metadata_table = udl2_conn.get_table(meta_table_name)
            data_dict_list = self.get_csv_dict_list(data_file)
            metadata_dict_list = self.get_csv_dict_list(metadata_file)
            udl2_conn.execute(metadata_table.insert(), metadata_dict_list)
            udl2_conn.execute(data_table.insert(), data_dict_list)
 def get_rows_in_table(self, columns):
     with get_udl_connection() as conn:
         table = conn.get_table(self.conf[mk.TARGET_DB_TABLE])
         guid_batch = self.conf['guid_batch']
         select_columns = [table.c[column] for column in columns]
         query = select(select_columns).where(table.c.guid_batch == guid_batch)\
                                       .order_by(table.c.src_file_rec_num)
         result = conn.execute(query)
         return result.fetchall()
 def tearDown(self):
     table_name = self.conf[mk.TARGET_DB_TABLE]
     guid_batch = self.conf['guid_batch']
     with get_udl_connection() as conn:
         table = conn.get_table(table_name)
         try:
             delete(table).where(table.c.guid_batch == guid_batch)
         except Exception as e:
             print('Exception -- ', e)
Пример #48
0
    def get_staging_asmt_score_avgs(self):
        with get_udl_connection() as conn:
            stg_outcome = conn.get_table('stg_sbac_asmt_outcome')
            query = select([
                func.avg(
                    cast(stg_outcome.c.assessmentsubtestresultscorevalue,
                         Integer)),
                func.avg(
                    cast(stg_outcome.c.assessmentsubtestminimumvalue,
                         Integer)),
                func.avg(
                    cast(stg_outcome.c.assessmentsubtestmaximumvalue,
                         Integer)),
                func.avg(
                    cast(stg_outcome.c.assessmentsubtestresultscoreclaim1value,
                         Integer)),
                func.avg(
                    cast(stg_outcome.c.assessmentsubtestclaim1minimumvalue,
                         Integer)),
                func.avg(
                    cast(stg_outcome.c.assessmentsubtestclaim1maximumvalue,
                         Integer)),
                func.avg(
                    cast(stg_outcome.c.assessmentsubtestresultscoreclaim2value,
                         Integer)),
                func.avg(
                    cast(stg_outcome.c.assessmentsubtestclaim2minimumvalue,
                         Integer)),
                func.avg(
                    cast(stg_outcome.c.assessmentsubtestclaim2maximumvalue,
                         Integer)),
                func.avg(
                    cast(stg_outcome.c.assessmentsubtestresultscoreclaim3value,
                         Integer)),
                func.avg(
                    cast(stg_outcome.c.assessmentsubtestclaim3minimumvalue,
                         Integer)),
                func.avg(
                    cast(stg_outcome.c.assessmentsubtestclaim3maximumvalue,
                         Integer)),
                func.avg(
                    cast(stg_outcome.c.assessmentsubtestresultscoreclaim4value,
                         Integer)),
                func.avg(
                    cast(stg_outcome.c.assessmentsubtestclaim4minimumvalue,
                         Integer)),
                func.avg(
                    cast(stg_outcome.c.assessmentsubtestclaim4maximumvalue,
                         Integer))
            ],
                           from_obj=stg_outcome)
            result = conn.execute(query)
            for row in result:
                asmt_avgs = row

            return asmt_avgs
 def check_job_completion(self, max_wait=30):
     with get_udl_connection() as connector:
         batch_table = connector.get_table(Constants.UDL2_BATCH_TABLE)
         query = select([batch_table.c.udl_phase], and_(batch_table.c.guid_batch == self.guid_batch_id, batch_table.c.udl_phase == 'UDL_COMPLETE'))
         timer = 0
         result = connector.execute(query).fetchall()
         while timer < max_wait and result == []:
             sleep(0.25)
             timer += 0.25
             result = connector.execute(query).fetchall()
Пример #50
0
def create_udl2_sequence(schema_name):
    '''
    create sequences according to configuration file
    @param udl2_conf: The configuration dictionary for
    '''
    print("create sequences")
    with get_udl_connection() as conn:
        metadata = conn.get_metadata()
        for sequence in generate_udl2_sequences(schema_name, metadata):
            conn.execute(CreateSequence(sequence))
 def test_rule_with_inlist_outlist(self):
     for rule in self.rule_list:
         rule_def = self.rule_conf[rule[0]]
         if 'inlist' in rule_def and 'outlist' in rule_def:
             with get_udl_connection() as conn:
                 for (input_val, output_val) in zip(rule_def['inlist'],
                                                    rule_def['outlist']):
                     result = conn.execute("SELECT %s('%s')" %
                                           (rule[1], input_val))
                     self.assertEqual(result.fetchone()[0], output_val)
Пример #52
0
 def validate_err_list_table(self, guid_batch_id):
     with get_udl_connection() as connector:
         error_table = connector.get_table('err_list')
         error_record = select([
             error_table.c.err_code_text
         ]).where(error_table.c.guid_batch == guid_batch_id)
         error_result = connector.execute(error_record).fetchall()
         expected_result = [('DELETE_RECORD_NOT_FOUND', )]
         self.assertEquals(error_result, expected_result,
                           "Error has not been logged into ERR_LIST table")