def import_via_fdw(conn, stg_columns, csv_table_columns, transformation_rules,
                   apply_rules, staging_schema, staging_table, csv_schema,
                   csv_table, start_seq, tenant_name):
    '''
    Load data from foreign table to staging table
    '''
    # create sequence name, use table_name and a random number combination.
    # This sequence is used for column src_file_rec_num
    seq_name = (csv_table + '_' + str(start_seq)).lower()

    global_tenant_seq_name = Constants.TENANT_SEQUENCE_NAME(tenant_name)

    # query 1 -- create query to create sequence
    create_sequence = queries.create_sequence_query(staging_schema, seq_name,
                                                    start_seq)
    # query 2 -- create query to load data from fdw to staging table
    insert_into_staging_table = queries.create_inserting_into_staging_query(
        stg_columns, apply_rules, csv_table_columns, staging_schema,
        staging_table, csv_schema, csv_table, seq_name, global_tenant_seq_name,
        transformation_rules)
    # query 3 -- create query to drop sequence
    drop_sequence = queries.drop_sequence_query(staging_schema, seq_name)
    # logger.debug('@@@@@@@', insert_into_staging_table)

    # execute 3 queries in order
    execute_udl_queries(
        conn, [create_sequence, insert_into_staging_table, drop_sequence],
        'Exception in loading data -- ', 'file_loader', 'import_via_fdw')
def create_fdw_tables(conn, header_names, header_types, csv_file, csv_schema, csv_table, fdw_server):
    '''
    Create one foreign table which maps to the given csv_file on the given fdw_server
    '''
    create_csv_ddl = queries.create_ddl_csv_query(header_names, header_types, csv_file, csv_schema, csv_table, fdw_server)
    drop_csv_ddl = queries.drop_ddl_csv_query(csv_schema, csv_table)
    # First drop the fdw table if exists, then create a new one
    execute_udl_queries(conn, [drop_csv_ddl, create_csv_ddl], 'Exception in creating fdw tables --', 'file_loader', 'create_fdw_tables')
def drop_fdw_tables(conn, csv_schema, csv_table):
    '''
    Drop foreign table
    '''
    drop_csv_ddl = queries.drop_ddl_csv_query(csv_schema, csv_table)
    execute_udl_queries(conn, [drop_csv_ddl],
                        'Exception in drop fdw table -- ', 'file_loader',
                        'drop_fdw_tables')
def create_fdw_tables(conn, header_names, header_types, csv_file, csv_schema,
                      csv_table, fdw_server):
    '''
    Create one foreign table which maps to the given csv_file on the given fdw_server
    '''
    create_csv_ddl = queries.create_ddl_csv_query(header_names, header_types,
                                                  csv_file, csv_schema,
                                                  csv_table, fdw_server)
    drop_csv_ddl = queries.drop_ddl_csv_query(csv_schema, csv_table)
    # First drop the fdw table if exists, then create a new one
    execute_udl_queries(conn, [drop_csv_ddl, create_csv_ddl],
                        'Exception in creating fdw tables --', 'file_loader',
                        'create_fdw_tables')
def move_data_from_staging_to_integration_all(source_table_name,
                                              target_table_name,
                                              err_list_table_name,
                                              guid_batch,
                                              target_columns,
                                              source_columns_with_tran_rule,
                                              record_sid=None):
    with get_udl_connection() as conn:

        sql_query = create_migration_query(conn,
                                           source_table_name,
                                           target_table_name,
                                           err_list_table_name,
                                           guid_batch,
                                           target_columns,
                                           source_columns_with_tran_rule,
                                           record_sid=record_sid)
        except_msg = "problem when load data from staging table to integration table"
        query_result = execute_udl_queries(
            conn, [sql_query],
            except_msg,
            'move_to_integration',
            'move_data_from_staging_to_integration',
            tries=-1)
    return query_result[0]
def explode_data_to_dim_table(conf, source_table, target_table, column_mapping, column_types):
    '''
    Main function to move data from source table to target tables.
    Source table can be INT_SBAC_ASMT, and INT_SBAC_ASMT_OUTCOME. Target table can be any dim tables in star schema.
    @param conf: one dictionary which has database settings, and guid_batch
    @param source_table: name of the source table where the data comes from
    @param target_table: name of the target table where the data should be moved to
    @param column_mapping: list of tuple of:
                           column_name_in_target, column_name_in_source
    @param column_types: data types of all columns in one target table
    '''
    # create database connection to target
    with get_target_connection(conf[mk.TENANT_NAME], conf[mk.GUID_BATCH]) as conn:
        # create insertion query
        # TODO: find out if the affected rows, time can be returned, so that the returned info can be put in the log
        # send only data that is needed to be inserted (such insert, update) to dimenstion table
        query = create_insert_query(conf, source_table, target_table, column_mapping, column_types, True,
                                    'C' if source_table in op_table_conf else None)

        logger.info(compile_query_to_sql_text(query))

        # execute the query
        affected_rows = execute_udl_queries(conn, [query],
                                            'Exception -- exploding data from integration to target ' +
                                            '{target_table}'.format(target_table=target_table),
                                            'move_to_target', 'explode_data_to_dim_table')

    return affected_rows
def move_data_from_int_tables_to_target_table(conf, task_name, source_tables,
                                              target_table):
    """Move student registration data from source integration tables to target table.
    Source tables are INT_STU_REG and INT_STU_REG_META. Target table is student_registration.

    @param conf: Configuration for particular load type (assessment or studentregistration)
    @param task_name: Name of the celery task invoking this method
    @param source_tables: Names of the source tables from where the data comes
    @param target_table: Name of the target table to where the data should be moved

    @return: Number of inserted rows
    """
    column_and_type_mapping = get_column_and_type_mapping(
        conf, task_name, target_table, source_tables)

    with get_target_connection(conf[mk.TENANT_NAME],
                               conf[mk.GUID_BATCH]) as conn_to_target_db:
        insert_query = create_sr_table_select_insert_query(
            conf, target_table, column_and_type_mapping)
        logger.info(insert_query)
        affected_rows = execute_udl_queries(
            conn_to_target_db, [insert_query],
            'Exception -- moving data from integration {int_table} to target {target_table}'
            .format(int_table=source_tables[0], target_table=target_table),
            'move_to_target', 'move_data_from_int_tables_to_target_table')

    return affected_rows
def explode_data_to_dim_table(conf, source_table, target_table, column_mapping,
                              column_types):
    '''
    Main function to move data from source table to target tables.
    Source table can be INT_SBAC_ASMT, and INT_SBAC_ASMT_OUTCOME. Target table can be any dim tables in star schema.
    @param conf: one dictionary which has database settings, and guid_batch
    @param source_table: name of the source table where the data comes from
    @param target_table: name of the target table where the data should be moved to
    @param column_mapping: list of tuple of:
                           column_name_in_target, column_name_in_source
    @param column_types: data types of all columns in one target table
    '''
    # create database connection to target
    with get_target_connection(conf[mk.TENANT_NAME],
                               conf[mk.GUID_BATCH]) as conn:
        # create insertion query
        # TODO: find out if the affected rows, time can be returned, so that the returned info can be put in the log
        # send only data that is needed to be inserted (such insert, update) to dimenstion table
        query = create_insert_query(
            conf, source_table, target_table, column_mapping, column_types,
            True, 'C' if source_table in op_table_conf else None)

        logger.info(compile_query_to_sql_text(query))

        # execute the query
        affected_rows = execute_udl_queries(
            conn, [query],
            'Exception -- exploding data from integration to target ' +
            '{target_table}'.format(target_table=target_table),
            'move_to_target', 'explode_data_to_dim_table')

    return affected_rows
def load_to_table(data_dict, guid_batch, int_table, tenant_name, udl_schema):
    '''
    Load the table into the proper table
    @param data_dict: the dictionary containing the data to be loaded
    @param guid_batch: the id for the batch
    @param int_table: the name of the integration table
    @param tenant_name: name of the tenant
    @param udl_schema: udl schema name
    '''
    # Create sqlalchemy connection and get table information from sqlalchemy
    ref_column_mapping_columns = {}
    with get_udl_connection() as conn:
        data_dict[mk.GUID_BATCH] = guid_batch
        data_dict = fix_empty_strings(data_dict)
        ref_table = conn.get_table('ref_column_mapping')
        s_int_table = conn.get_table(int_table)
        column_mapping_query = select([ref_table.c.target_column,
                                       ref_table.c.stored_proc_name],
                                      from_obj=ref_table).where(and_(ref_table.c.source_table == 'lz_json',
                                                                     ref_table.c.target_table == int_table))
        results = conn.get_result(column_mapping_query)
        for result in results:
            target_column = result['target_column']
            stored_proc_name = result['stored_proc_name']
            value = data_dict.get(target_column)
            if value:
                if stored_proc_name:
                    if stored_proc_name.startswith('sp_'):
                        ref_column_mapping_columns[target_column] = stored_proc_name + '(' + QuotedString(value if type(value) is str else str(value)).getquoted().decode('utf-8') + ')'
                    else:
                        format_value = dict()
                        format_value['value'] = QuotedString(value if type(value) is str
                                                             else str(value)).getquoted().decode('utf-8')
                        if s_int_table.c[target_column].type.python_type is str:
                            format_value['length'] = s_int_table.c[target_column].type.length
                        ref_column_mapping_columns[target_column] = stored_proc_name.format(**format_value)
                    continue
            ref_column_mapping_columns[target_column] = value

        record_sid = 'nextval(\'{schema_name}.{tenant_sequence_name}\')'.\
            format(schema_name=udl_schema, tenant_sequence_name=Constants.TENANT_SEQUENCE_NAME(tenant_name))
        from_select_column_names = ['record_sid']
        from_select_select_values = [record_sid]
        for column in s_int_table.c:
            value = data_dict.get(column.name)
            if value is not None:
                from_select_column_names.append(column.name)
                from_select_select_values.append(
                    ref_column_mapping_columns.get(column.name,
                                                   QuotedString(value if type(value) is str else str(value)).getquoted().decode('utf-8')))
        insert_into_int_table = s_int_table.insert().from_select(from_select_column_names,
                                                                 select(from_select_select_values))
        # create insert statement and execute
        affected_row = db_util.execute_udl_queries(conn, [insert_into_int_table],
                                                   'Exception in loading json data -- ',
                                                   'json_loader', 'load_to_table')

    return affected_row[0]
示例#10
0
def import_via_fdw(conn, stg_columns, csv_table_columns, transformation_rules,
                   apply_rules, staging_schema, staging_table, csv_schema, csv_table, start_seq, tenant_name):
    '''
    Load data from foreign table to staging table
    '''
    # create sequence name, use table_name and a random number combination.
    # This sequence is used for column src_file_rec_num
    seq_name = (csv_table + '_' + str(start_seq)).lower()

    global_tenant_seq_name = Constants.TENANT_SEQUENCE_NAME(tenant_name)

    # query 1 -- create query to create sequence
    create_sequence = queries.create_sequence_query(staging_schema, seq_name, start_seq)
    # query 2 -- create query to load data from fdw to staging table
    insert_into_staging_table = queries.create_inserting_into_staging_query(stg_columns, apply_rules, csv_table_columns,
                                                                            staging_schema, staging_table, csv_schema,
                                                                            csv_table, seq_name,
                                                                            global_tenant_seq_name, transformation_rules)
    # query 3 -- create query to drop sequence
    drop_sequence = queries.drop_sequence_query(staging_schema, seq_name)
    # logger.debug('@@@@@@@', insert_into_staging_table)

    # execute 3 queries in order
    execute_udl_queries(conn, [create_sequence, insert_into_staging_table, drop_sequence], 'Exception in loading data -- ', 'file_loader', 'import_via_fdw')
def move_data_from_int_tables_to_target_table(conf, task_name, source_tables, target_table):
    """Move student registration data from source integration tables to target table.
    Source tables are INT_STU_REG and INT_STU_REG_META. Target table is student_registration.

    @param conf: Configuration for particular load type (assessment or studentregistration)
    @param task_name: Name of the celery task invoking this method
    @param source_tables: Names of the source tables from where the data comes
    @param target_table: Name of the target table to where the data should be moved

    @return: Number of inserted rows
    """
    column_and_type_mapping = get_column_and_type_mapping(conf, task_name, target_table, source_tables)

    with get_target_connection(conf[mk.TENANT_NAME], conf[mk.GUID_BATCH]) as conn_to_target_db:
        insert_query = create_sr_table_select_insert_query(conf, target_table, column_and_type_mapping)
        logger.info(insert_query)
        affected_rows = execute_udl_queries(conn_to_target_db, [insert_query],
                                            'Exception -- moving data from integration {int_table} to target {target_table}'
                                            .format(int_table=source_tables[0], target_table=target_table),
                                            'move_to_target', 'move_data_from_int_tables_to_target_table')

    return affected_rows
def load_to_table(data_dict, guid_batch, int_table, tenant_name, udl_schema):
    '''
    Load the table into the proper table
    @param data_dict: the dictionary containing the data to be loaded
    @param guid_batch: the id for the batch
    @param int_table: the name of the integration table
    @param tenant_name: name of the tenant
    @param udl_schema: udl schema name
    '''
    # Create sqlalchemy connection and get table information from sqlalchemy
    ref_column_mapping_columns = {}
    with get_udl_connection() as conn:
        data_dict[mk.GUID_BATCH] = guid_batch
        data_dict = fix_empty_strings(data_dict)
        ref_table = conn.get_table('ref_column_mapping')
        s_int_table = conn.get_table(int_table)
        column_mapping_query = select(
            [ref_table.c.target_column, ref_table.c.stored_proc_name],
            from_obj=ref_table).where(
                and_(ref_table.c.source_table == 'lz_json',
                     ref_table.c.target_table == int_table))
        results = conn.get_result(column_mapping_query)
        for result in results:
            target_column = result['target_column']
            stored_proc_name = result['stored_proc_name']
            value = data_dict.get(target_column)
            if value:
                if stored_proc_name:
                    if stored_proc_name.startswith('sp_'):
                        ref_column_mapping_columns[
                            target_column] = stored_proc_name + '(' + QuotedString(
                                value if type(value) is str else str(value)
                            ).getquoted().decode('utf-8') + ')'
                    else:
                        format_value = dict()
                        format_value['value'] = QuotedString(
                            value if type(value) is str else str(value)
                        ).getquoted().decode('utf-8')
                        if s_int_table.c[target_column].type.python_type is str:
                            format_value['length'] = s_int_table.c[
                                target_column].type.length
                        ref_column_mapping_columns[
                            target_column] = stored_proc_name.format(
                                **format_value)
                    continue
            ref_column_mapping_columns[target_column] = value

        record_sid = 'nextval(\'{schema_name}.{tenant_sequence_name}\')'.\
            format(schema_name=udl_schema, tenant_sequence_name=Constants.TENANT_SEQUENCE_NAME(tenant_name))
        from_select_column_names = ['record_sid']
        from_select_select_values = [record_sid]
        for column in s_int_table.c:
            value = data_dict.get(column.name)
            if value is not None:
                from_select_column_names.append(column.name)
                from_select_select_values.append(
                    ref_column_mapping_columns.get(
                        column.name,
                        QuotedString(value if type(value) is str else str(
                            value)).getquoted().decode('utf-8')))
        insert_into_int_table = s_int_table.insert().from_select(
            from_select_column_names, select(from_select_select_values))
        # create insert statement and execute
        affected_row = db_util.execute_udl_queries(
            conn, [insert_into_int_table],
            'Exception in loading json data -- ', 'json_loader',
            'load_to_table')

    return affected_row[0]
示例#13
0
def drop_fdw_tables(conn, csv_schema, csv_table):
    '''
    Drop foreign table
    '''
    drop_csv_ddl = queries.drop_ddl_csv_query(csv_schema, csv_table)
    execute_udl_queries(conn, [drop_csv_ddl], 'Exception in drop fdw table -- ', 'file_loader', 'drop_fdw_tables')