def import_via_fdw(conn, stg_columns, csv_table_columns, transformation_rules, apply_rules, staging_schema, staging_table, csv_schema, csv_table, start_seq, tenant_name): ''' Load data from foreign table to staging table ''' # create sequence name, use table_name and a random number combination. # This sequence is used for column src_file_rec_num seq_name = (csv_table + '_' + str(start_seq)).lower() global_tenant_seq_name = Constants.TENANT_SEQUENCE_NAME(tenant_name) # query 1 -- create query to create sequence create_sequence = queries.create_sequence_query(staging_schema, seq_name, start_seq) # query 2 -- create query to load data from fdw to staging table insert_into_staging_table = queries.create_inserting_into_staging_query( stg_columns, apply_rules, csv_table_columns, staging_schema, staging_table, csv_schema, csv_table, seq_name, global_tenant_seq_name, transformation_rules) # query 3 -- create query to drop sequence drop_sequence = queries.drop_sequence_query(staging_schema, seq_name) # logger.debug('@@@@@@@', insert_into_staging_table) # execute 3 queries in order execute_udl_queries( conn, [create_sequence, insert_into_staging_table, drop_sequence], 'Exception in loading data -- ', 'file_loader', 'import_via_fdw')
def create_fdw_tables(conn, header_names, header_types, csv_file, csv_schema, csv_table, fdw_server): ''' Create one foreign table which maps to the given csv_file on the given fdw_server ''' create_csv_ddl = queries.create_ddl_csv_query(header_names, header_types, csv_file, csv_schema, csv_table, fdw_server) drop_csv_ddl = queries.drop_ddl_csv_query(csv_schema, csv_table) # First drop the fdw table if exists, then create a new one execute_udl_queries(conn, [drop_csv_ddl, create_csv_ddl], 'Exception in creating fdw tables --', 'file_loader', 'create_fdw_tables')
def drop_fdw_tables(conn, csv_schema, csv_table): ''' Drop foreign table ''' drop_csv_ddl = queries.drop_ddl_csv_query(csv_schema, csv_table) execute_udl_queries(conn, [drop_csv_ddl], 'Exception in drop fdw table -- ', 'file_loader', 'drop_fdw_tables')
def move_data_from_staging_to_integration_all(source_table_name, target_table_name, err_list_table_name, guid_batch, target_columns, source_columns_with_tran_rule, record_sid=None): with get_udl_connection() as conn: sql_query = create_migration_query(conn, source_table_name, target_table_name, err_list_table_name, guid_batch, target_columns, source_columns_with_tran_rule, record_sid=record_sid) except_msg = "problem when load data from staging table to integration table" query_result = execute_udl_queries( conn, [sql_query], except_msg, 'move_to_integration', 'move_data_from_staging_to_integration', tries=-1) return query_result[0]
def explode_data_to_dim_table(conf, source_table, target_table, column_mapping, column_types): ''' Main function to move data from source table to target tables. Source table can be INT_SBAC_ASMT, and INT_SBAC_ASMT_OUTCOME. Target table can be any dim tables in star schema. @param conf: one dictionary which has database settings, and guid_batch @param source_table: name of the source table where the data comes from @param target_table: name of the target table where the data should be moved to @param column_mapping: list of tuple of: column_name_in_target, column_name_in_source @param column_types: data types of all columns in one target table ''' # create database connection to target with get_target_connection(conf[mk.TENANT_NAME], conf[mk.GUID_BATCH]) as conn: # create insertion query # TODO: find out if the affected rows, time can be returned, so that the returned info can be put in the log # send only data that is needed to be inserted (such insert, update) to dimenstion table query = create_insert_query(conf, source_table, target_table, column_mapping, column_types, True, 'C' if source_table in op_table_conf else None) logger.info(compile_query_to_sql_text(query)) # execute the query affected_rows = execute_udl_queries(conn, [query], 'Exception -- exploding data from integration to target ' + '{target_table}'.format(target_table=target_table), 'move_to_target', 'explode_data_to_dim_table') return affected_rows
def move_data_from_int_tables_to_target_table(conf, task_name, source_tables, target_table): """Move student registration data from source integration tables to target table. Source tables are INT_STU_REG and INT_STU_REG_META. Target table is student_registration. @param conf: Configuration for particular load type (assessment or studentregistration) @param task_name: Name of the celery task invoking this method @param source_tables: Names of the source tables from where the data comes @param target_table: Name of the target table to where the data should be moved @return: Number of inserted rows """ column_and_type_mapping = get_column_and_type_mapping( conf, task_name, target_table, source_tables) with get_target_connection(conf[mk.TENANT_NAME], conf[mk.GUID_BATCH]) as conn_to_target_db: insert_query = create_sr_table_select_insert_query( conf, target_table, column_and_type_mapping) logger.info(insert_query) affected_rows = execute_udl_queries( conn_to_target_db, [insert_query], 'Exception -- moving data from integration {int_table} to target {target_table}' .format(int_table=source_tables[0], target_table=target_table), 'move_to_target', 'move_data_from_int_tables_to_target_table') return affected_rows
def explode_data_to_dim_table(conf, source_table, target_table, column_mapping, column_types): ''' Main function to move data from source table to target tables. Source table can be INT_SBAC_ASMT, and INT_SBAC_ASMT_OUTCOME. Target table can be any dim tables in star schema. @param conf: one dictionary which has database settings, and guid_batch @param source_table: name of the source table where the data comes from @param target_table: name of the target table where the data should be moved to @param column_mapping: list of tuple of: column_name_in_target, column_name_in_source @param column_types: data types of all columns in one target table ''' # create database connection to target with get_target_connection(conf[mk.TENANT_NAME], conf[mk.GUID_BATCH]) as conn: # create insertion query # TODO: find out if the affected rows, time can be returned, so that the returned info can be put in the log # send only data that is needed to be inserted (such insert, update) to dimenstion table query = create_insert_query( conf, source_table, target_table, column_mapping, column_types, True, 'C' if source_table in op_table_conf else None) logger.info(compile_query_to_sql_text(query)) # execute the query affected_rows = execute_udl_queries( conn, [query], 'Exception -- exploding data from integration to target ' + '{target_table}'.format(target_table=target_table), 'move_to_target', 'explode_data_to_dim_table') return affected_rows
def load_to_table(data_dict, guid_batch, int_table, tenant_name, udl_schema): ''' Load the table into the proper table @param data_dict: the dictionary containing the data to be loaded @param guid_batch: the id for the batch @param int_table: the name of the integration table @param tenant_name: name of the tenant @param udl_schema: udl schema name ''' # Create sqlalchemy connection and get table information from sqlalchemy ref_column_mapping_columns = {} with get_udl_connection() as conn: data_dict[mk.GUID_BATCH] = guid_batch data_dict = fix_empty_strings(data_dict) ref_table = conn.get_table('ref_column_mapping') s_int_table = conn.get_table(int_table) column_mapping_query = select([ref_table.c.target_column, ref_table.c.stored_proc_name], from_obj=ref_table).where(and_(ref_table.c.source_table == 'lz_json', ref_table.c.target_table == int_table)) results = conn.get_result(column_mapping_query) for result in results: target_column = result['target_column'] stored_proc_name = result['stored_proc_name'] value = data_dict.get(target_column) if value: if stored_proc_name: if stored_proc_name.startswith('sp_'): ref_column_mapping_columns[target_column] = stored_proc_name + '(' + QuotedString(value if type(value) is str else str(value)).getquoted().decode('utf-8') + ')' else: format_value = dict() format_value['value'] = QuotedString(value if type(value) is str else str(value)).getquoted().decode('utf-8') if s_int_table.c[target_column].type.python_type is str: format_value['length'] = s_int_table.c[target_column].type.length ref_column_mapping_columns[target_column] = stored_proc_name.format(**format_value) continue ref_column_mapping_columns[target_column] = value record_sid = 'nextval(\'{schema_name}.{tenant_sequence_name}\')'.\ format(schema_name=udl_schema, tenant_sequence_name=Constants.TENANT_SEQUENCE_NAME(tenant_name)) from_select_column_names = ['record_sid'] from_select_select_values = [record_sid] for column in s_int_table.c: value = data_dict.get(column.name) if value is not None: from_select_column_names.append(column.name) from_select_select_values.append( ref_column_mapping_columns.get(column.name, QuotedString(value if type(value) is str else str(value)).getquoted().decode('utf-8'))) insert_into_int_table = s_int_table.insert().from_select(from_select_column_names, select(from_select_select_values)) # create insert statement and execute affected_row = db_util.execute_udl_queries(conn, [insert_into_int_table], 'Exception in loading json data -- ', 'json_loader', 'load_to_table') return affected_row[0]
def import_via_fdw(conn, stg_columns, csv_table_columns, transformation_rules, apply_rules, staging_schema, staging_table, csv_schema, csv_table, start_seq, tenant_name): ''' Load data from foreign table to staging table ''' # create sequence name, use table_name and a random number combination. # This sequence is used for column src_file_rec_num seq_name = (csv_table + '_' + str(start_seq)).lower() global_tenant_seq_name = Constants.TENANT_SEQUENCE_NAME(tenant_name) # query 1 -- create query to create sequence create_sequence = queries.create_sequence_query(staging_schema, seq_name, start_seq) # query 2 -- create query to load data from fdw to staging table insert_into_staging_table = queries.create_inserting_into_staging_query(stg_columns, apply_rules, csv_table_columns, staging_schema, staging_table, csv_schema, csv_table, seq_name, global_tenant_seq_name, transformation_rules) # query 3 -- create query to drop sequence drop_sequence = queries.drop_sequence_query(staging_schema, seq_name) # logger.debug('@@@@@@@', insert_into_staging_table) # execute 3 queries in order execute_udl_queries(conn, [create_sequence, insert_into_staging_table, drop_sequence], 'Exception in loading data -- ', 'file_loader', 'import_via_fdw')
def move_data_from_int_tables_to_target_table(conf, task_name, source_tables, target_table): """Move student registration data from source integration tables to target table. Source tables are INT_STU_REG and INT_STU_REG_META. Target table is student_registration. @param conf: Configuration for particular load type (assessment or studentregistration) @param task_name: Name of the celery task invoking this method @param source_tables: Names of the source tables from where the data comes @param target_table: Name of the target table to where the data should be moved @return: Number of inserted rows """ column_and_type_mapping = get_column_and_type_mapping(conf, task_name, target_table, source_tables) with get_target_connection(conf[mk.TENANT_NAME], conf[mk.GUID_BATCH]) as conn_to_target_db: insert_query = create_sr_table_select_insert_query(conf, target_table, column_and_type_mapping) logger.info(insert_query) affected_rows = execute_udl_queries(conn_to_target_db, [insert_query], 'Exception -- moving data from integration {int_table} to target {target_table}' .format(int_table=source_tables[0], target_table=target_table), 'move_to_target', 'move_data_from_int_tables_to_target_table') return affected_rows
def load_to_table(data_dict, guid_batch, int_table, tenant_name, udl_schema): ''' Load the table into the proper table @param data_dict: the dictionary containing the data to be loaded @param guid_batch: the id for the batch @param int_table: the name of the integration table @param tenant_name: name of the tenant @param udl_schema: udl schema name ''' # Create sqlalchemy connection and get table information from sqlalchemy ref_column_mapping_columns = {} with get_udl_connection() as conn: data_dict[mk.GUID_BATCH] = guid_batch data_dict = fix_empty_strings(data_dict) ref_table = conn.get_table('ref_column_mapping') s_int_table = conn.get_table(int_table) column_mapping_query = select( [ref_table.c.target_column, ref_table.c.stored_proc_name], from_obj=ref_table).where( and_(ref_table.c.source_table == 'lz_json', ref_table.c.target_table == int_table)) results = conn.get_result(column_mapping_query) for result in results: target_column = result['target_column'] stored_proc_name = result['stored_proc_name'] value = data_dict.get(target_column) if value: if stored_proc_name: if stored_proc_name.startswith('sp_'): ref_column_mapping_columns[ target_column] = stored_proc_name + '(' + QuotedString( value if type(value) is str else str(value) ).getquoted().decode('utf-8') + ')' else: format_value = dict() format_value['value'] = QuotedString( value if type(value) is str else str(value) ).getquoted().decode('utf-8') if s_int_table.c[target_column].type.python_type is str: format_value['length'] = s_int_table.c[ target_column].type.length ref_column_mapping_columns[ target_column] = stored_proc_name.format( **format_value) continue ref_column_mapping_columns[target_column] = value record_sid = 'nextval(\'{schema_name}.{tenant_sequence_name}\')'.\ format(schema_name=udl_schema, tenant_sequence_name=Constants.TENANT_SEQUENCE_NAME(tenant_name)) from_select_column_names = ['record_sid'] from_select_select_values = [record_sid] for column in s_int_table.c: value = data_dict.get(column.name) if value is not None: from_select_column_names.append(column.name) from_select_select_values.append( ref_column_mapping_columns.get( column.name, QuotedString(value if type(value) is str else str( value)).getquoted().decode('utf-8'))) insert_into_int_table = s_int_table.insert().from_select( from_select_column_names, select(from_select_select_values)) # create insert statement and execute affected_row = db_util.execute_udl_queries( conn, [insert_into_int_table], 'Exception in loading json data -- ', 'json_loader', 'load_to_table') return affected_row[0]