def get_integration_demographic_counts(self): demographics = [ 'dmg_eth_hsp', 'dmg_eth_ami', 'dmg_eth_asn', 'dmg_eth_blk', 'dmg_eth_pcf', 'dmg_eth_wht', 'dmg_eth_2om', 'dmg_prg_iep', 'dmg_prg_lep', 'dmg_prg_504', 'dmg_sts_ecd', 'dmg_sts_mig' ] results_dict = {} with get_udl_connection() as conn: int_outcome = conn.get_table('int_sbac_asmt_outcome') for entry in demographics: query = select( [func.count(int_outcome.c[entry])], from_obj=int_outcome).where(int_outcome.c[entry] == true()) result = conn.execute(query) for row in result: demo_count = row[0] results_dict[entry] = demo_count #get derived ethnicity eth_query = select( [func.count(int_outcome.c[entry])], from_obj=int_outcome).where(int_outcome.c[entry] is not None) result = conn.execute(eth_query) for row in result: derived_count = row[0] results_dict['dmg_eth_derived'] = derived_count return results_dict
def get_integration_asmt_score_avgs(self): with get_udl_connection() as conn: int_outcome = conn.get_table('int_sbac_asmt_outcome') query = select([ func.avg(int_outcome.c.score_asmt), func.avg(int_outcome.c.score_asmt_min), func.avg(int_outcome.c.score_asmt_max), func.avg(int_outcome.c.score_claim_1), func.avg(int_outcome.c.score_claim_1_min), func.avg(int_outcome.c.score_claim_1_max), func.avg(int_outcome.c.score_claim_2), func.avg(int_outcome.c.score_claim_2_min), func.avg(int_outcome.c.score_claim_2_max), func.avg(int_outcome.c.score_claim_3), func.avg(int_outcome.c.score_claim_3_min), func.avg(int_outcome.c.score_claim_3_max), func.avg(int_outcome.c.score_claim_4), func.avg(int_outcome.c.score_claim_4_min), func.avg(int_outcome.c.score_claim_4_max) ], from_obj=int_outcome) result = conn.execute(query) for row in result: asmt_avgs = row return asmt_avgs
def truncate_udl_tables(self): with get_udl_connection() as conn: tables = get_tables_starting_with(conn.get_metadata(), 'int_') + \ get_tables_starting_with(conn.get_metadata(), 'stg_') + ['err_list', 'udl_batch'] for t in tables: table = conn.get_table(t) conn.execute(table.delete())
def empty_table(self): #Delete all data from batch_table with get_udl_connection() as connector: batch_table = connector.get_table(Constants.UDL2_BATCH_TABLE) result = connector.execute(batch_table.delete()) query = select([batch_table]) result1 = connector.execute(query).fetchall() number_of_row = len(result1) self.assertEqual(number_of_row, 0) #Delete all data from err_list err_list_table = connector.get_table('err_list') delete_data = connector.execute(err_list_table.delete()) query_table = select([err_list_table]) query_result = connector.execute(query_table).fetchall() number_of_row = len(query_result) self.assertEqual(number_of_row, 0) #Delete all data from udl_stats table with StatsDBConnection() as conn: table = conn.get_table('udl_stats') conn.execute(table.delete()) query = select([table]) query_tab = conn.execute(query).fetchall() no_rows = len(query_tab)
def get_transformation_rule_names(ref_table_name): ''' Get a list of all used transformation rule names from the database @param engine: sqlalchemy engine object @param conn: sqlalchemy connection object @param ref_schema: the name of the reference schema @param ref_table_name: the name of the reference table containing the column mapping info @return: The list of transformations rules without duplicates @rtype: list ''' with get_udl_connection() as conn: # get column_mapping table object col_map_table = conn.get_table(ref_table_name) trans_rules = [] # Create select statement to get distinct transformation rules select_stmt = select([col_map_table.c.transformation_rule]).distinct() # Put each rule in list and return for row in conn.execute(select_stmt): rule = row[0] if rule: trans_rules.append(rule) return trans_rules
def move_data_from_staging_to_integration_one_by_one( source_table_name, target_table_name, err_list_table_name, guid_batch, target_columns, source_columns_with_tran_rule): success = 0 fail = 0 with get_udl_connection() as conn: source_table = conn.get_table(source_table_name) select_source_table = select( [source_table.c.record_sid.label('record_sid')], from_obj=[source_table ]).where(source_table.c.guid_batch == guid_batch) results = conn.get_result(select_source_table) for result in results: try: record_sid = result.get('record_sid') query_result = move_data_from_staging_to_integration_all( source_table_name, target_table_name, err_list_table_name, guid_batch, target_columns, source_columns_with_tran_rule, record_sid=record_sid) success += query_result except: logger.error('Failed to integrate record: batch_guid[' + guid_batch + '] record_sid[' + str(record_sid) + ']') fail += 1 return success, fail
def get_asmt_rec_id(guid_batch, tenant_name, asmt_rec_id_info): ''' Returns asmt_rec_id from dim_asmt table Steps: 1. Get guid_asmt from integration table INT_SBAC_ASMT 2. Select asmt_rec_id from dim_asmt by the same guid_amst got from 1. It should have 1 value ''' source_table_name = asmt_rec_id_info['source_table'] guid_column_name_in_source = asmt_rec_id_info['guid_column_in_source'] target_table_name = asmt_rec_id_info['target_table'] guid_column_name_in_target = asmt_rec_id_info['guid_column_name'] rec_id_column_name = asmt_rec_id_info['rec_id'] # connect to integration table, to get the value of guid_asmt with get_udl_connection() as udl_conn: int_table = udl_conn.get_table(source_table_name) query = select([int_table.c[guid_column_name_in_source]], from_obj=int_table, limit=1) query = query.where(int_table.c['guid_batch'] == guid_batch) results = udl_conn.get_result(query) if results: guid_column_value = results[0][guid_column_name_in_source] # connect to target table, to get the value of asmt_rec_id with get_target_connection(tenant_name, guid_batch) as target_conn: dim_asmt = target_conn.get_table(target_table_name) query = select([dim_asmt.c[rec_id_column_name]], from_obj=dim_asmt, limit=1) query = query.where(dim_asmt.c[guid_column_name_in_target] == guid_column_value) query = query.where(and_(dim_asmt.c['batch_guid'] == guid_batch)) results = target_conn.get_result(query) if results: asmt_rec_id = results[0][rec_id_column_name] return asmt_rec_id
def empty_table(self): #Delete all data from batch_table with get_udl_connection() as connector: batch_table = connector.get_table(Constants.UDL2_BATCH_TABLE) result = connector.execute(batch_table.delete()) query = select([batch_table]) result1 = connector.execute(query).fetchall() number_of_row = len(result1) self.assertEqual(number_of_row, 0) #Delete all data from err_list err_list_table = connector.get_table('err_list') delete_data = connector.execute(err_list_table.delete()) query_table = select([err_list_table]) query_result = connector.execute(query_table).fetchall() number_of_row = len(query_result) self.assertEqual(number_of_row, 0) #Delete all data from udl_stats table with StatsDBConnection() as conn: table = conn.get_table('udl_stats') conn.execute(table.delete()) query = select([table]) query_tab = conn.execute(query).fetchall() no_rows = len(query_tab)
def get_staging_demographic_counts(self): demographics = ['hispanicorlatinoethnicity', 'americanindianoralaskanative', 'asian', 'blackorafricanamerican', 'nativehawaiianorotherpacificislander', 'white', 'demographicracetwoormoreraces', 'ideaindicator', 'lepstatus', 'section504status', 'economicdisadvantagestatus', 'migrantstatus'] results_dict = {} with get_udl_connection() as conn: stg_outcome = conn.get_table('stg_sbac_asmt_outcome') for entry in demographics: query = select([func.count(stg_outcome.c[entry])], from_obj=stg_outcome).where(stg_outcome.c[entry].in_(['Y', 'y', 'yes'])) result = conn.execute(query) for row in result: demo_count = row[0] results_dict[entry] = demo_count corrleated_results = { 'dmg_eth_hsp': results_dict['hispanicorlatinoethnicity'], 'dmg_eth_ami': results_dict['americanindianoralaskanative'], 'dmg_eth_asn': results_dict['asian'], 'dmg_eth_blk': results_dict['blackorafricanamerican'], 'dmg_eth_pcf': results_dict['nativehawaiianorotherpacificislander'], 'dmg_eth_wht': results_dict['white'], 'dmg_eth_2om': results_dict['demographicracetwoormoreraces'], 'dmg_prg_iep': results_dict['ideaindicator'], 'dmg_prg_lep': results_dict['lepstatus'], 'dmg_prg_504': results_dict['section504status'], 'dmg_sts_ecd': results_dict['economicdisadvantagestatus'], 'dmg_sts_mig': results_dict['migrantstatus'], } return corrleated_results
def drop_schema(schema_name): ''' drop schemas according to configuration file @param udl2_conf: The configuration dictionary for ''' with get_udl_connection() as conn: conn.execute(DropSchema(schema_name, cascade=True))
def update_column_mappings(rule_map_list, ref_table_name): ''' loop through the column mapping rows in the database and populate the stored procedure column based on the transformation name @param rule_map_list: A list of tuples containing mapping info. Tuples should be: (rule_name, proc_name) @param engine: sqlalchemy engine object @param conn: sqlalchemy connection object @param ref_schema: the name of the reference schema @param ref_table_name: the name of the reference table containing the column mapping info ''' # check that list is not empty before preceding. if not rule_map_list: print('NO FUNCTIONS ADDED TO DATABASE') return with get_udl_connection() as conn: # get column_mapping table object col_map_table = conn.get_table(ref_table_name) # Generate sql to perform update update_stmt = col_map_table.update().where(col_map_table.c.transformation_rule == bindparam('rule_name')) update_stmt = update_stmt.values(stored_proc_name=bindparam('proc_name'), stored_proc_created_date=datetime.datetime.now()) # Create list of dicts that sqlalchemy will recognize # to update all rules with corresponding stored procedure. for pair in rule_map_list: conn.execute(update_stmt, rule_name=pair[0], proc_name=pair[1])
def get_err_list(self): with get_udl_connection() as conn: err_list_table = conn.get_table('err_list') query = select([err_list_table]).where( err_list_table.c['guid_batch'].__eq__(self.guid_batch)) result = conn.get_result(query) return result
def move_data_from_staging_to_integration_all(source_table_name, target_table_name, err_list_table_name, guid_batch, target_columns, source_columns_with_tran_rule, record_sid=None): with get_udl_connection() as conn: sql_query = create_migration_query(conn, source_table_name, target_table_name, err_list_table_name, guid_batch, target_columns, source_columns_with_tran_rule, record_sid=record_sid) except_msg = "problem when load data from staging table to integration table" query_result = execute_udl_queries( conn, [sql_query], except_msg, 'move_to_integration', 'move_data_from_staging_to_integration', tries=-1) return query_result[0]
def setUp(self): try: config_path = dict(os.environ)['UDL2_CONF'] except Exception: config_path = UDL2_DEFAULT_CONFIG_PATH_FILE conf_tup = read_ini_file(config_path) udl2_conf = conf_tup[0] initialize_db_udl(udl2_conf) self.ref_schema = udl2_conf['udl2_db_conn']['db_schema'] self.ref_table_name = Constants.UDL2_REF_MAPPING_TABLE(Constants.LOAD_TYPE_ASSESSMENT) # Testable Rules self.rule_names = transform_rules.keys() self.rule_conf = transform_rules self.rule_list = transformation_code_generator.generate_transformations(self.rule_names, rule_conf=self.rule_conf) self.testable_rules = [] for rule in self.rule_list: self.testable_rules.append(rule[0]) test_rows = [] for rule in self.testable_rules: ins_dict = { 'phase': -999, 'source_table': 'ftest_table', 'source_column': 'ftest_column', 'target_table': 'ftest_table1', 'target_column': 'ftest_column1', 'transformation_rule': rule, } test_rows.append(ins_dict) with get_udl_connection() as conn: self.ref_table = conn.get_table(self.ref_table_name) conn.execute(self.ref_table.insert(test_rows))
def compare_csv_table_data(self, csv_file, key_column): table_name = self.conf[mk.TARGET_DB_TABLE] guid_batch = self.conf['guid_batch'] result_key = 'studentidentifier' if table_name == 'stg_sbac_asmt_outcome' else 'guid_student' with get_udl_connection() as conn: table = conn.get_table(table_name) query = select([table]).where(table.c.guid_batch == guid_batch) results = conn.execute(query) result_list = results.fetchall() expected_rows = self.get_clean_rows_from_file(csv_file) # sort rows student_id_index = results.keys().index(result_key) # Determine index of guid_student in results result_list = sorted(result_list, key=lambda i: i[student_id_index]) # sort results using this index expected_rows = sorted(expected_rows, key=lambda k: k[key_column]) # sort expected based on the key # Loop through rows for i in range(len(result_list)): res_row = result_list[i] expect_row = expected_rows[i] # Loop through columns for ci in range(len(res_row)): if results.keys()[ci] in expect_row: # if column is in the expected data # change_empty_vals_to_none() converts all 0's and empty strings to None self.assertEqual(self.change_empty_vals_to_none(res_row[ci]), self.change_empty_vals_to_none(expect_row[results.keys()[ci]]), 'Values are not the same for column %s' % results.keys()[ci])
def get_row_number_in_table(self): with get_udl_connection() as conn: table = conn.get_table(self.conf[mk.TARGET_DB_TABLE]) guid_batch = self.conf['guid_batch'] query = select([table]).where(table.c.guid_batch == guid_batch) result = conn.execute(query) return result.rowcount
def get_transformation_rule_names(ref_table_name): ''' Get a list of all used transformation rule names from the database @param engine: sqlalchemy engine object @param conn: sqlalchemy connection object @param ref_schema: the name of the reference schema @param ref_table_name: the name of the reference table containing the column mapping info @return: The list of transformations rules without duplicates @rtype: list ''' with get_udl_connection() as conn: # get column_mapping table object col_map_table = conn.get_table(ref_table_name) trans_rules = [] # Create select statement to get distinct transformation rules select_stmt = select([col_map_table.c.transformation_rule]).distinct() # Put each rule in list and return for row in conn.execute(select_stmt): rule = row[0] if rule: trans_rules.append(rule) return trans_rules
def update_column_mappings(rule_map_list, ref_table_name): ''' loop through the column mapping rows in the database and populate the stored procedure column based on the transformation name @param rule_map_list: A list of tuples containing mapping info. Tuples should be: (rule_name, proc_name) @param engine: sqlalchemy engine object @param conn: sqlalchemy connection object @param ref_schema: the name of the reference schema @param ref_table_name: the name of the reference table containing the column mapping info ''' # check that list is not empty before preceding. if not rule_map_list: print('NO FUNCTIONS ADDED TO DATABASE') return with get_udl_connection() as conn: # get column_mapping table object col_map_table = conn.get_table(ref_table_name) # Generate sql to perform update update_stmt = col_map_table.update().where( col_map_table.c.transformation_rule == bindparam('rule_name')) update_stmt = update_stmt.values( stored_proc_name=bindparam('proc_name'), stored_proc_created_date=datetime.datetime.now()) # Create list of dicts that sqlalchemy will recognize # to update all rules with corresponding stored procedure. for pair in rule_map_list: conn.execute(update_stmt, rule_name=pair[0], proc_name=pair[1])
def truncate_udl_tables(self): with get_udl_connection() as conn: tables = get_tables_starting_with(conn.get_metadata(), 'int_') + \ get_tables_starting_with(conn.get_metadata(), 'stg_') + ['err_list', 'udl_batch'] for t in tables: table = conn.get_table(t) conn.execute(table.delete())
def get_row_number_in_table(self): with get_udl_connection() as conn: table = conn.get_table(self.conf[mk.TARGET_DB_TABLE]) guid_batch = self.conf['guid_batch'] query = select([table]).where(table.c.guid_batch == guid_batch) result = conn.execute(query) return result.rowcount
def drop_foreign_data_wrapper_extension(): ''' drop foreign data wrapper extension according to configuration file ''' print('drop foreign data wrapper extension') with get_udl_connection() as conn: conn.execute(text("DROP EXTENSION IF EXISTS file_fdw CASCADE"))
def compare_csv_table_data(self, csv_file, key_column): table_name = self.conf[mk.TARGET_DB_TABLE] guid_batch = self.conf['guid_batch'] result_key = 'studentidentifier' if table_name == 'stg_sbac_asmt_outcome' else 'guid_student' with get_udl_connection() as conn: table = conn.get_table(table_name) query = select([table]).where(table.c.guid_batch == guid_batch) results = conn.execute(query) result_list = results.fetchall() expected_rows = self.get_clean_rows_from_file(csv_file) # sort rows student_id_index = results.keys().index( result_key) # Determine index of guid_student in results result_list = sorted(result_list, key=lambda i: i[student_id_index] ) # sort results using this index expected_rows = sorted( expected_rows, key=lambda k: k[key_column]) # sort expected based on the key # Loop through rows for i in range(len(result_list)): res_row = result_list[i] expect_row = expected_rows[i] # Loop through columns for ci in range(len(res_row)): if results.keys()[ci] in expect_row: # if column is in the expected data # change_empty_vals_to_none() converts all 0's and empty strings to None self.assertEqual( self.change_empty_vals_to_none(res_row[ci]), self.change_empty_vals_to_none( expect_row[results.keys()[ci]]), 'Values are not the same for column %s' % results.keys()[ci])
def test_derive_eth_function(self): function_name = sr.special_rules['deriveEthnicity'][0] # dmg_eth_blk, dmg_eth_asn, dmg_eth_hsp, dmg_eth_ami, dmg_eth_pcf, dmg_eth_wht prepare_data = {'exception': {'src_column': "'sda', 'dg', 'a', 'q', 't', 'fff', 'z'", 'expected_code': -1}, 'not stated 1': {'src_column': "NULL, NULL, NULL, NULL, NULL, NULL, NULL", 'expected_code': 0}, 'not stated 2': {'src_column': "'f', NULL, NULL, 'f', NULL, 'f', NULL", 'expected_code': 0}, 'african american': {'src_column': "'y', 'n', 'n', 'n', 'n', 'n', 'n'", 'expected_code': 1}, 'asian': {'src_column': "'n', 'y', 'n', 'n', 'n', 'n', 'n'", 'expected_code': 2}, 'hispanic 1': {'src_column': "'n', 'n', 'y', 'n', 'n', 'n', 'n'", 'expected_code': 3}, 'hispanic 2': {'src_column': "'n', 'n', 'y', 'y', 'n', 'y', 'n'", 'expected_code': 3}, 'hispanic 3': {'src_column': "'n', 'n', 'y', 'n', 'n', 'n', 'y'", 'expected_code': 3}, 'native american': {'src_column': "'n', 'n', 'n', 'y', 'n', 'n', 'n'", 'expected_code': 4}, 'pacific islander': {'src_column': "'n', 'n', 'n', 'n', 'y', 'n', 'n'", 'expected_code': 5}, 'white': {'src_column': "'n', 'n', 'n', 'n', 'n', 'y', 'n'", 'expected_code': 6}, 'two or more races 1': {'src_column': "'y', 'n', 'n', 'n', 'n', 'y', 'n'", 'expected_code': 1}, 'two or more races 2': {'src_column': "'n', 'y', 'n', 'n', NULL, 'y', 'n'", 'expected_code': 2}, 'two or more races 3': {'src_column': "'y', 'y', 'n', 'y', 'y', 'y', 'y'", 'expected_code': 7}, 'two or more races 4': {'src_column': "'n', 'n', 'n', 'n', 'n', 'n', 'y'", 'expected_code': 7} } sql_template = 'SELECT %s;' % function_name with get_udl_connection() as conn: for _key, value in prepare_data.items(): sql = sql_template.format(src_column=value['src_column']) result = conn.execute(sql) actual_value = '' for r in result: actual_value = r[0] break self.assertEqual(actual_value, value['expected_code'])
def get_udl_batch(self): with get_udl_connection() as conn: batch_table = conn.get_table('udl_batch') query = select([batch_table]).where( batch_table.c['guid_batch'].__eq__(self.guid_batch)) result = conn.get_result(query) return result
def test_get_column_mapping_from_stg_to_int(self): expected_target_columns = ['name_state', 'code_state', 'guid_district', 'name_district', 'guid_school', 'name_school', 'guid_student', 'external_ssid_student', 'name_student_first', 'name_student_middle', 'name_student_last', 'birthdate_student', 'sex_student', 'grade_enrolled', 'dmg_eth_hsp', 'dmg_eth_ami', 'dmg_eth_asn', 'dmg_eth_blk', 'dmg_eth_pcf', 'dmg_eth_wht', 'dmg_multi_race', 'dmg_prg_iep', 'dmg_prg_lep', 'dmg_prg_504', 'dmg_sts_ecd', 'dmg_sts_mig', 'code_language', 'eng_prof_lvl', 'us_school_entry_date', 'lep_entry_date', 'lep_exit_date', 't3_program_type', 'prim_disability_type', 'created_date', 'guid_batch'] expected_source_columns_with_tran_rule = ['substr("A".name_state, 1, 50)', 'substr("A".code_state, 1, 2)', 'substr("A".guid_district, 1, 40)', 'substr("A".name_district, 1, 60)', 'substr("A".guid_school, 1, 40)', 'substr("A".name_school, 1, 60)', 'substr("A".guid_student, 1, 40)', 'substr("A".external_ssid_student, 1, 40)', 'substr("A".name_student_first, 1, 35)', 'substr("A".name_student_middle, 1, 35)', 'substr("A".name_student_last, 1, 35)', 'substr("A".birthdate_student, 1, 10)', 'substr("A".sex_student, 1, 10)', 'substr("A".grade_enrolled, 1, 2)', 'case "A".dmg_eth_hsp when \'\' then null else cast("A".dmg_eth_hsp as bool) end', 'case "A".dmg_eth_ami when \'\' then null else cast("A".dmg_eth_ami as bool) end', 'case "A".dmg_eth_asn when \'\' then null else cast("A".dmg_eth_asn as bool) end', 'case "A".dmg_eth_blk when \'\' then null else cast("A".dmg_eth_blk as bool) end', 'case "A".dmg_eth_pcf when \'\' then null else cast("A".dmg_eth_pcf as bool) end', 'case "A".dmg_eth_wht when \'\' then null else cast("A".dmg_eth_wht as bool) end', 'case "A".dmg_multi_race when \'\' then null else cast("A".dmg_multi_race as bool) end', 'case "A".dmg_prg_iep when \'\' then null else cast("A".dmg_prg_iep as bool) end', 'case "A".dmg_prg_lep when \'\' then null else cast("A".dmg_prg_lep as bool) end', 'case "A".dmg_prg_504 when \'\' then null else cast("A".dmg_prg_504 as bool) end', 'case "A".dmg_sts_ecd when \'\' then null else cast("A".dmg_sts_ecd as bool) end', 'case "A".dmg_sts_mig when \'\' then null else cast("A".dmg_sts_mig as bool) end', 'substr("A".code_language, 1, 3)', 'substr("A".eng_prof_lvl, 1, 20)', 'substr("A".us_school_entry_date, 1, 10)', 'substr("A".lep_entry_date, 1, 10)', 'substr("A".lep_exit_date, 1, 10)', 'substr("A".t3_program_type, 1, 27)', 'substr("A".prim_disability_type, 1, 3)', '"A".created_date', '"A".guid_batch'] with get_udl_connection() as conn: target_columns, source_columns_with_tran_rule = get_column_mapping_from_stg_to_int(conn, Constants.UDL2_REF_MAPPING_TABLE(Constants.LOAD_TYPE_STUDENT_REGISTRATION), 'stg_sbac_stu_reg', 'int_sbac_stu_reg') self.assertEqual(expected_target_columns, target_columns) self.assertEqual(expected_source_columns_with_tran_rule, source_columns_with_tran_rule)
def validate_err_table(self, guid_batch_id): with get_udl_connection() as connector: error_table = connector.get_table('err_list') error_record = select([error_table.c.err_source_text]).where(error_table.c.guid_batch == guid_batch_id) error_result = connector.execute(error_record).fetchall() expected_result = [('DELETE_FACT_ASMT_OUTCOME_RECORD_MORE_THAN_ONCE',)] self.assertEquals(error_result, expected_result, "Error has not been logged for deleting the same data twice into ERR_LIST table")
def validate_err_list_table(self, guid_batch_id): with get_udl_connection() as connector: error_table = connector.get_table('err_list') error_record = select([error_table.c.err_code_text]).where(error_table.c.guid_batch == guid_batch_id) error_result = connector.execute(error_record).fetchall() expected_result = [('DELETE_RECORD_NOT_FOUND',)] self.assertEquals(error_result, expected_result, "Error has not been logged into ERR_LIST table")
def record_benchmark(self): ''' Record the benchmark information for the this instance of the benchmarking information ''' with get_udl_connection() as connector: batch_table = connector.get_table(Constants.UDL2_BATCH_TABLE) connector.execute(batch_table.insert(), self.get_result_dict())
def cleanup_udl_tables(guid_batch): """ """ with get_udl_connection() as connector: cleanup_all_tables(connector=connector, column_name='guid_batch', value=guid_batch, batch_delete=True, table_name_prefix='int_') cleanup_all_tables(connector=connector, column_name='guid_batch', value=guid_batch, batch_delete=True, table_name_prefix='stg_')
def test_rule_with_inlist_outlist(self): for rule in self.rule_list: rule_def = self.rule_conf[rule[0]] if 'inlist' in rule_def and 'outlist' in rule_def: with get_udl_connection() as conn: for (input_val, output_val) in zip(rule_def['inlist'], rule_def['outlist']): result = conn.execute("SELECT %s('%s')" % (rule[1], input_val)) self.assertEqual(result.fetchone()[0], output_val)
def drop_foreign_data_wrapper_server(fdw_server): ''' drop server for foreign data wrapper according to configuration file @param udl2_conf: The configuration dictionary for ''' print('drop foreign data wrapper server') with get_udl_connection() as conn: conn.execute(text("DROP SERVER IF EXISTS %s CASCADE" % (fdw_server)))
def empty_batch_table(self): with get_udl_connection() as connector: batch_table = connector.get_table(Constants.UDL2_BATCH_TABLE) result = connector.execute(batch_table.delete()) query = select([batch_table]) result1 = connector.execute(query).fetchall() number_of_row = len(result1) self.assertEqual(number_of_row, 0)
def create_foreign_data_wrapper_extension(schema_name): ''' create foreign data wrapper extension according to configuration file @param udl2_conf: The configuration dictionary for ''' print('create foreign data wrapper extension') with get_udl_connection() as conn: conn.execute(text("CREATE EXTENSION IF NOT EXISTS file_fdw"))
def record_benchmark(self): ''' Record the benchmark information for the this instance of the benchmarking information ''' with get_udl_connection() as connector: batch_table = connector.get_table(Constants.UDL2_BATCH_TABLE) connector.execute(batch_table.insert(), self.get_result_dict())
def create_foreign_data_wrapper_server(fdw_server): ''' create server for foreign data wrapper according to configuration file @param udl2_conf: The configuration dictionary for ''' print('create foreign data wrapper server') with get_udl_connection() as conn: conn.execute(text("CREATE SERVER %s FOREIGN DATA WRAPPER file_fdw" % (fdw_server)))
def load_to_table(data_dict, guid_batch, int_table, tenant_name, udl_schema): ''' Load the table into the proper table @param data_dict: the dictionary containing the data to be loaded @param guid_batch: the id for the batch @param int_table: the name of the integration table @param tenant_name: name of the tenant @param udl_schema: udl schema name ''' # Create sqlalchemy connection and get table information from sqlalchemy ref_column_mapping_columns = {} with get_udl_connection() as conn: data_dict[mk.GUID_BATCH] = guid_batch data_dict = fix_empty_strings(data_dict) ref_table = conn.get_table('ref_column_mapping') s_int_table = conn.get_table(int_table) column_mapping_query = select([ref_table.c.target_column, ref_table.c.stored_proc_name], from_obj=ref_table).where(and_(ref_table.c.source_table == 'lz_json', ref_table.c.target_table == int_table)) results = conn.get_result(column_mapping_query) for result in results: target_column = result['target_column'] stored_proc_name = result['stored_proc_name'] value = data_dict.get(target_column) if value: if stored_proc_name: if stored_proc_name.startswith('sp_'): ref_column_mapping_columns[target_column] = stored_proc_name + '(' + QuotedString(value if type(value) is str else str(value)).getquoted().decode('utf-8') + ')' else: format_value = dict() format_value['value'] = QuotedString(value if type(value) is str else str(value)).getquoted().decode('utf-8') if s_int_table.c[target_column].type.python_type is str: format_value['length'] = s_int_table.c[target_column].type.length ref_column_mapping_columns[target_column] = stored_proc_name.format(**format_value) continue ref_column_mapping_columns[target_column] = value record_sid = 'nextval(\'{schema_name}.{tenant_sequence_name}\')'.\ format(schema_name=udl_schema, tenant_sequence_name=Constants.TENANT_SEQUENCE_NAME(tenant_name)) from_select_column_names = ['record_sid'] from_select_select_values = [record_sid] for column in s_int_table.c: value = data_dict.get(column.name) if value is not None: from_select_column_names.append(column.name) from_select_select_values.append( ref_column_mapping_columns.get(column.name, QuotedString(value if type(value) is str else str(value)).getquoted().decode('utf-8'))) insert_into_int_table = s_int_table.insert().from_select(from_select_column_names, select(from_select_select_values)) # create insert statement and execute affected_row = db_util.execute_udl_queries(conn, [insert_into_int_table], 'Exception in loading json data -- ', 'json_loader', 'load_to_table') return affected_row[0]
def test_stored_procedures_exist_in_db(self): populate_stored_proc(self.ref_table_name) with get_udl_connection() as conn: for rule in self.testable_rules: stored_proc_query = "SELECT proname FROM pg_proc WHERE proname = 'sp_{0}';".format(rule.lower()) res = conn.execute(stored_proc_query).fetchall()[0][0] expected = 'sp_{0}'.format(rule) self.assertEqual(res.lower(), expected.lower())
def tearDown(self): table_name = self.conf[mk.TARGET_DB_TABLE] guid_batch = self.conf['guid_batch'] with get_udl_connection() as conn: table = conn.get_table(table_name) try: delete(table).where(table.c.guid_batch == guid_batch) except Exception as e: print('Exception -- ', e)
def get_rows_in_table(self, columns): with get_udl_connection() as conn: table = conn.get_table(self.conf[mk.TARGET_DB_TABLE]) guid_batch = self.conf['guid_batch'] select_columns = [table.c[column] for column in columns] query = select(select_columns).where(table.c.guid_batch == guid_batch)\ .order_by(table.c.src_file_rec_num) result = conn.execute(query) return result.fetchall()
def load_file(conf): ''' Main function to initiate file loader ''' logger.info("Starting data load from csv to staging") with get_udl_connection() as conn: # start loading file process time_for_load_as_seconds = load_data_process(conn, conf) logger.info("Data Loaded from csv to Staging in %s seconds" % time_for_load_as_seconds)
def validate_successful_job_completion(self): with get_udl_connection() as connector: batch_table = connector.get_table(Constants.UDL2_BATCH_TABLE) query = select([batch_table.c.udl_phase_step_status], and_(batch_table.c.guid_batch == self.batch_id, batch_table.c.udl_phase == 'UDL_COMPLETE')) result = connector.execute(query).fetchall() self.assertNotEqual(result, []) for row in result: status = row['udl_phase_step_status'] self.assertEqual(status, NotificationConstants.SUCCESS, 'UDL process completed successfully')
def get_intput_file(batch_guid): input_file = '' with get_udl_connection() as connector: batch_table = connector.get_table(Constants.UDL2_BATCH_TABLE) s = select([batch_table.c.input_file.label('input_file')]).where(and_(batch_table.c.udl_phase == 'udl2.W_file_arrived.task', batch_table.c.guid_batch == batch_guid)) results = connector.get_result(s) if results: input_file = results[0]['input_file'] return input_file
def get_asmt_and_outcome_result(self, conf): with get_udl_connection() as conn: asmt_table = conn.get_table(conf.get(mk.ASMT_TABLE)) asmt_outcome_table = conn.get_table(conf.get(mk.ASMT_OUTCOME_TABLE)) asmt_result = conn.get_result(select([asmt_table.c.guid_asmt]). where(asmt_table.c.guid_batch == conf.get(mk.GUID_BATCH))) asmt_outcome_result = conn.get_result(select([asmt_outcome_table.c.assessmentguid], distinct=True). where(asmt_outcome_table.c.guid_batch == conf.get(mk.GUID_BATCH))) return asmt_result, asmt_outcome_result
def connect_verify_db(self): with get_udl_connection() as connector: batch_table = connector.get_table(Constants.UDL2_BATCH_TABLE) query = select([batch_table]) result = connector.execute(query).fetchall() output = select([batch_table.c.udl_phase_step_status], and_(batch_table.c.udl_phase == 'UDL_COMPLETE', batch_table.c.guid_batch == self.guid_batch_id)) output_data = connector.execute(output).fetchall() tuple_str = [('SUCCESS',)] self.assertEqual(tuple_str, output_data)
def load_csv_data_to_integration(self, data_file, metadata_file, data_table_name, meta_table_name): with get_udl_connection() as udl2_conn: data_table = udl2_conn.get_table(data_table_name) metadata_table = udl2_conn.get_table(meta_table_name) data_dict_list = self.get_csv_dict_list(data_file) metadata_dict_list = self.get_csv_dict_list(metadata_file) udl2_conn.execute(metadata_table.insert(), metadata_dict_list) udl2_conn.execute(data_table.insert(), data_dict_list)
def get_rows_in_table(self, columns): with get_udl_connection() as conn: table = conn.get_table(self.conf[mk.TARGET_DB_TABLE]) guid_batch = self.conf['guid_batch'] select_columns = [table.c[column] for column in columns] query = select(select_columns).where(table.c.guid_batch == guid_batch)\ .order_by(table.c.src_file_rec_num) result = conn.execute(query) return result.fetchall()
def tearDown(self): table_name = self.conf[mk.TARGET_DB_TABLE] guid_batch = self.conf['guid_batch'] with get_udl_connection() as conn: table = conn.get_table(table_name) try: delete(table).where(table.c.guid_batch == guid_batch) except Exception as e: print('Exception -- ', e)
def get_staging_asmt_score_avgs(self): with get_udl_connection() as conn: stg_outcome = conn.get_table('stg_sbac_asmt_outcome') query = select([ func.avg( cast(stg_outcome.c.assessmentsubtestresultscorevalue, Integer)), func.avg( cast(stg_outcome.c.assessmentsubtestminimumvalue, Integer)), func.avg( cast(stg_outcome.c.assessmentsubtestmaximumvalue, Integer)), func.avg( cast(stg_outcome.c.assessmentsubtestresultscoreclaim1value, Integer)), func.avg( cast(stg_outcome.c.assessmentsubtestclaim1minimumvalue, Integer)), func.avg( cast(stg_outcome.c.assessmentsubtestclaim1maximumvalue, Integer)), func.avg( cast(stg_outcome.c.assessmentsubtestresultscoreclaim2value, Integer)), func.avg( cast(stg_outcome.c.assessmentsubtestclaim2minimumvalue, Integer)), func.avg( cast(stg_outcome.c.assessmentsubtestclaim2maximumvalue, Integer)), func.avg( cast(stg_outcome.c.assessmentsubtestresultscoreclaim3value, Integer)), func.avg( cast(stg_outcome.c.assessmentsubtestclaim3minimumvalue, Integer)), func.avg( cast(stg_outcome.c.assessmentsubtestclaim3maximumvalue, Integer)), func.avg( cast(stg_outcome.c.assessmentsubtestresultscoreclaim4value, Integer)), func.avg( cast(stg_outcome.c.assessmentsubtestclaim4minimumvalue, Integer)), func.avg( cast(stg_outcome.c.assessmentsubtestclaim4maximumvalue, Integer)) ], from_obj=stg_outcome) result = conn.execute(query) for row in result: asmt_avgs = row return asmt_avgs
def check_job_completion(self, max_wait=30): with get_udl_connection() as connector: batch_table = connector.get_table(Constants.UDL2_BATCH_TABLE) query = select([batch_table.c.udl_phase], and_(batch_table.c.guid_batch == self.guid_batch_id, batch_table.c.udl_phase == 'UDL_COMPLETE')) timer = 0 result = connector.execute(query).fetchall() while timer < max_wait and result == []: sleep(0.25) timer += 0.25 result = connector.execute(query).fetchall()
def create_udl2_sequence(schema_name): ''' create sequences according to configuration file @param udl2_conf: The configuration dictionary for ''' print("create sequences") with get_udl_connection() as conn: metadata = conn.get_metadata() for sequence in generate_udl2_sequences(schema_name, metadata): conn.execute(CreateSequence(sequence))
def test_rule_with_inlist_outlist(self): for rule in self.rule_list: rule_def = self.rule_conf[rule[0]] if 'inlist' in rule_def and 'outlist' in rule_def: with get_udl_connection() as conn: for (input_val, output_val) in zip(rule_def['inlist'], rule_def['outlist']): result = conn.execute("SELECT %s('%s')" % (rule[1], input_val)) self.assertEqual(result.fetchone()[0], output_val)
def validate_err_list_table(self, guid_batch_id): with get_udl_connection() as connector: error_table = connector.get_table('err_list') error_record = select([ error_table.c.err_code_text ]).where(error_table.c.guid_batch == guid_batch_id) error_result = connector.execute(error_record).fetchall() expected_result = [('DELETE_RECORD_NOT_FOUND', )] self.assertEquals(error_result, expected_result, "Error has not been logged into ERR_LIST table")