Пример #1
0
    def sample_data_test(self, source_db_node, source_user, source_pwd,
                         target_db_node, target_user, target_pwd):
        print("Get necessary metadata from source and target")
        """Step 1 get source/target table list from source target table mapping"""
        db2_metadata_query = "SELECT NAME as COLUMN_NAME,TBNAME as TABLE_NAME,TBCREATOR AS TABLE_SCHEMA,COLNO AS COLUMN_NUMBER,COLTYPE AS COLUMN_TYPE,LENGTH AS COLUMN_LENGTH,KEYSEQ AS KEY_SEQ \
                            FROM SYSIBM.SYSCOLUMNS \
                            WHERE UPPER(TBNAME) IN (SELECT UPPER(NAME) FROM SYSIBM.SYSTABLES WHERE TYPE = 'T') AND \
                            UPPER(TBCREATOR) in ({}) \
                            AND UPPER (TBNAME) in ({}) order by COLNO "

        pda_metadata_query = "SELECT ATTNAME AS COLUMN_NAME,NAME AS TABLE_NAME,SCHEMA AS TABLE_SCHEMA,ATTNUM AS COLUMN_NUMBER,FORMAT_TYPE AS COLUMN_TYPE,ATTCOLLENG AS COLUMN_LENGTH,'0' AS KEY_SEQ \
                            FROM _V_RELATION_COLUMN \
                            WHERE UPPER(TYPE) = 'TABLE' AND \
                            UPPER(SCHEMA) in ({}) \
                            AND UPPER(NAME) in ({}) "

        conf = ReadConfig()
        source_target_table_mapping = conf.read_source_target_table_mapping()
        print(source_target_table_mapping)
        '''Get source and target db metadata'''
        print("Step 1: Get source table list.")
        print("Step 2: Get source tables' column list to file")
        source_schema_list = []
        target_schema_list = []
        source_table_list = []
        target_table_list = []
        for item in source_target_table_mapping:
            source_schema_list.append(item['SRC_OBJ_NM'].split('.')[0])
            source_table_list.append(item['SRC_OBJ_NM'].split('.')[1])
            target_schema_list.append(item['TRGT_TBL_NM'].split('.')[0])
            target_table_list.append(item['TRGT_TBL_NM'].split('.')[1])
        source_schema_list = list(set(source_schema_list))
        target_schema_list = list(set(target_schema_list))
        source_table_list = list(set(source_table_list))
        target_table_list = list(set(target_table_list))
        print("Step 3: Get target table list.")
        '''get source tables' metadata'''
        source_db_driver = conf.Read_db_config(source_db_node)['driver']
        #db_driver = db_node['driver']
        if source_db_driver == '{IBM DB2 ODBC DRIVER}' or source_db_driver == 'com.ibm.db2.jcc.DB2Driver':
            source_query = db2_metadata_query.format(
                str(source_schema_list).strip('[').strip(']'),
                str(source_table_list).strip('[').strip(']'))
            print(source_query)
        else:
            source_query = pda_metadata_query.format(
                str(source_schema_list).strip('[').strip(']'),
                str(source_table_list).strip('[').strip(']'))
        print(source_query)
        if source_db_driver == 'com.ibm.db2.jcc.DB2Driver':
            source_metadata = db_connect.exec_sql_with_jdbc(
                source_db_node, source_user, source_pwd, source_query)
        else:
            source_metadata = db_connect.exec_sql_common(
                source_db_node, source_user, source_pwd, source_query)
        '''table to map'''
        source_table_columns_dict = {}
        for item in source_metadata:
            source_table_columns = item['TABLE_SCHEMA'].strip(
            ) + "." + item['TABLE_NAME']
            column_dict = {}
            column_dict['COLUMN_NAME'] = item['COLUMN_NAME']
            column_dict['COLUMN_NUMBER'] = item['COLUMN_NUMBER']
            column_dict['COLUMN_TYPE'] = item['COLUMN_TYPE']
            column_dict['COLUMN_LENGTH'] = item['COLUMN_LENGTH']
            column_dict['KEY_SEQ'] = item['KEY_SEQ']
            if source_table_columns_dict.__contains__(source_table_columns):
                source_table_columns_dict[source_table_columns].append(
                    column_dict)
            else:
                column_list = []
                column_list.append(column_dict)
                source_table_columns_dict[source_table_columns] = column_list
        print(source_table_columns_dict)
        '''Store the table mapping to a temp file'''
        file_name = os.path.join(conf.read_temp_dir(), 'source_metadata.tmp')
        print(file_name)
        with open(file_name, 'w') as f:
            json.dump(source_table_columns_dict, f)
        print("Step 4: Get target tables' column list.")
        '''get target tables' metadata'''
        target_db_driver = conf.Read_db_config(target_db_node)['driver']
        print('target db driver:' + target_db_driver)
        if target_db_driver == '{IBM DB2 ODBC DRIVER}' or target_db_driver == 'com.ibm.db2.jcc.DB2Driver':
            target_query = db2_metadata_query.format(
                str(target_schema_list).strip('[').strip(']'),
                str(target_table_list).strip('[').strip(']'))
            print(target_query)
        else:
            target_query = pda_metadata_query.format(
                str(target_schema_list).strip('[').strip(']'),
                str(target_table_list).strip('[').strip(']'))
        print(target_query)

        if target_db_driver == 'com.ibm.db2.jcc.DB2Driver':
            target_metadata = db_connect.exec_sql_with_jdbc(
                target_db_node, target_user, target_pwd, target_query)
        else:
            target_metadata = db_connect.exec_sql_common(
                target_db_node, target_user, target_pwd, target_query)
        '''table to map'''
        target_table_columns_dict = {}
        for item in target_metadata:
            target_table_columns = item['TABLE_SCHEMA'].strip(
            ) + "." + item['TABLE_NAME']
            column_dict = {}
            column_dict['COLUMN_NAME'] = item['COLUMN_NAME']
            column_dict['COLUMN_NUMBER'] = item['COLUMN_NUMBER']
            column_dict['COLUMN_TYPE'] = item['COLUMN_TYPE'].split('(')[0]
            column_dict['COLUMN_LENGTH'] = item['COLUMN_LENGTH']
            column_dict['KEY_SEQ'] = item['KEY_SEQ']
            if target_table_columns_dict.__contains__(target_table_columns):
                target_table_columns_dict[target_table_columns].append(
                    column_dict)
            else:
                column_list = []
                column_list.append(column_dict)
                target_table_columns_dict[target_table_columns] = column_list
        print(target_table_columns_dict)
        '''Store the target metadata a temp file'''
        file_name = os.path.join(conf.read_temp_dir(), 'target_metadata.tmp')
        print(file_name)
        with open(file_name, 'w') as f:
            json.dump(target_table_columns_dict, f)
        '''Build source_target_column_mapping'''
        print("step 5: get source/target tables column mapping")
        source_target_column_mapping = []
        for item in source_target_table_mapping:
            source_table = item['SRC_OBJ_NM']
            target_table = item['TRGT_TBL_NM']
            source_columns = source_table_columns_dict[source_table]
            target_columns = target_table_columns_dict[target_table]
            for src_col in source_columns:
                for tar_col in target_columns:
                    if tar_col['COLUMN_NUMBER'] == src_col['COLUMN_NUMBER']:
                        source_target_column_mapping.append({"SOURCE_TABLE": source_table, "TARGET_TABLE": target_table,\
                                                             "SOURCE_COLUMN": src_col['COLUMN_NAME'],\
                                                             "TARGET_COLUMN": tar_col['COLUMN_NAME'],\
                                                             "SOURCE_COLUMN_NUMBER": src_col['COLUMN_NUMBER'],\
                                                             "TARGET_COLUMN_NUMBER": tar_col['COLUMN_NUMBER']})
        print(source_target_column_mapping)
        '''Store to temp'''
        file_name = os.path.join(conf.read_temp_dir(),
                                 'source_target_column_mapping.tmp')
        print(file_name)
        with open(file_name, 'w') as f:
            json.dump(source_target_column_mapping, f)
        '''For each source build key_value mapping of columns'''
        source_target_column_mapping_dict = {}
        one_table_src_tgt_col_mapping_dict = {}
        for items in source_target_column_mapping:
            if source_target_column_mapping_dict.__contains__(
                    items['SOURCE_TABLE']):
                one_table_src_tgt_col_mapping_dict[
                    items['SOURCE_COLUMN']] = items['TARGET_COLUMN']
                source_target_column_mapping_dict[
                    items['SOURCE_TABLE']] = one_table_src_tgt_col_mapping_dict
            else:
                one_table_src_tgt_col_mapping_dict = {}
                one_table_src_tgt_col_mapping_dict[
                    items['SOURCE_COLUMN']] = items['TARGET_COLUMN']
                source_target_column_mapping_dict[
                    items['SOURCE_TABLE']] = one_table_src_tgt_col_mapping_dict
        print("source_target_column_mapping_dict" +
              str(source_target_column_mapping_dict))

        print("For each source table get source table sample data")
        for item in source_target_table_mapping:
            source_table = item['SRC_OBJ_NM']
            target_table = item['TRGT_TBL_NM']
            print("Source table name:" + source_table)
            source_key = []
            source_column_list = []
            target_column_list = []
            source_where_condition = conf.Read_where_condition(source_table)
            for row in source_table_columns_dict[source_table]:
                source_column_list.append(row['COLUMN_NAME'])
                if row['KEY_SEQ'] != '0':
                    source_key.append(row['COLUMN_NAME'])

            print('source_column_list:' + str(source_column_list))
            print('source_key:' + str(source_key))
            for row in target_table_columns_dict[target_table]:
                target_column_list.append(row['COLUMN_NAME'])
            print("Target_column_list:" + str(target_column_list))
            source_column_str = str(source_column_list).strip('[').strip(
                ']').replace("'", '')
            target_column_str = str(target_column_list).strip('[').strip(
                ']').replace("'", '')
            print('Source Column str:' + source_column_str)
            print('Target Column str:' + target_column_str)
            source_sample_query_run_flag = False
            target_sample_query_run_flag = False
            if source_where_condition != 'NULL':
                source_sample_query = "select {} from {} {}".format(
                    source_column_str, source_table, source_where_condition)
                print("source_sample_query:" + source_sample_query)
                target_where_condition = self.source_condition_transfer(
                    source_table, source_where_condition)
                target_sample_query = "select {} from {} {}".format(
                    target_column_str, target_table, target_where_condition)
                print("target_sample_query" + target_sample_query)

            elif len(source_key) != 0:
                source_sample_query = "with a as (select RAND()*50 as RANDOM_KEY, {} from {} \
                order by RANDOM_KEY fetch first 10 rows only) select {} from a order by {} asc" \
                    .format(source_column_str, source_table, source_column_str,
                            str(source_key).strip('[').strip(']').replace("'", ''))
                print(source_sample_query)
                if source_db_driver == 'com.ibm.db2.jcc.DB2Driver':
                    source_sample_data = db_connect.exec_sql_with_jdbc(
                        'siwdb2_jdbc', 'pdaetlg', 'sep09sep',
                        source_sample_query)
                else:
                    source_sample_data = db_connect.exec_sql_common(
                        'xx', 'xx', 'xx', source_sample_query)
                source_sample_query_run_flag = True
                '''format timestamp'''

                source_sample_data_formated = eval(
                    self.Date_time_format_transfer(str(source_sample_data)))
                #print(type(source_sample_data_formated),type(source_sample_data_formated[0]),source_sample_data_formated)
                file_name = os.path.join(conf.read_temp_dir(),
                                         source_table + "_sample.tmp")
                with open(file_name, 'w') as f:
                    json.dump(source_sample_data_formated, f)

                target_condition_str = " where "
                target_key_list = []
                for item in source_key:
                    target_key = ''
                    primary_key_value_list = []
                    for row in source_target_column_mapping:
                        if row['SOURCE_COLUMN'] == item and row[
                                'SOURCE_TABLE'] == source_table:
                            target_key = row['TARGET_COLUMN']
                            target_key_list.append(target_key)
                    for row in source_sample_data:
                        primary_key_value_list.append(row[item])
                    if item == source_key[-1]:
                        target_condition_str = target_condition_str + target_key + " in ({})".format(
                            str(primary_key_value_list).strip('[').strip(']'))
                    else:
                        target_condition_str = target_condition_str + target_key + " in ({}) and ".format(
                            str(primary_key_value_list).strip('[').strip(']'))
                target_condition_str += "order by {} asc".format(
                    str(target_key).strip('[').strip(']').replace("'", ''))
                print(str(target_condition_str))
                target_sample_query = "select {} from {} {}".format(
                    target_column_str, target_table, target_condition_str)
                print(target_sample_query)
            else:
                source_sample_query = "select {} from {}".format(
                    source_column_str, source_table)
                target_sample_query = "select {} from {}".format(
                    target_column_str, target_table)

            if source_sample_query_run_flag == False:
                print("Source table name:" + source_table)
                source_db_driver = 'com.ibm.db2.jcc.DB2Driver'
                if source_db_driver == 'com.ibm.db2.jcc.DB2Driver':
                    source_sample_data = db_connect.exec_sql_with_jdbc(
                        'siwdb2_jdbc', 'pdaetlg', 'sep09sep',
                        source_sample_query)
                else:
                    source_sample_data = db_connect.exec_sql_common(
                        'xx', 'xx', 'xx', source_sample_query)
                '''format timestamp'''

                source_sample_data_formated = eval(
                    self.Date_time_format_transfer(str(source_sample_data)))
                #print(type(json.loads(source_sample_data_formated)),json.loads(source_sample_data_formated))
                file_name = os.path.join(conf.read_temp_dir(),
                                         source_table + "_sample.tmp")
                with open(file_name, 'w') as f:
                    json.dump(source_sample_data_formated, f)

            if target_sample_query_run_flag == False:
                print("Target table name:" + target_table)
                if target_db_driver == 'com.ibm.db2.jcc.DB2Driver':
                    target_sample_data = db_connect.exec_sql_with_jdbc(
                        'xx', 'xx', 'xx', target_sample_query)
                else:
                    target_sample_data = db_connect.exec_sql_common(
                        'siwodspda', 'siwgit', 'SIWJul2019JulSIW',
                        target_sample_query)
                print(target_sample_data)
                file_name = os.path.join(conf.read_temp_dir(),
                                         target_table + "_sample.tmp")
                with open(file_name, 'w') as f:
                    json.dump(target_sample_data, f)
                '''validation'''
                source_diff_list = []
                target_diff_list = []
                for source_row in source_sample_data_formated:
                    for target_row in target_sample_data:
                        compare_flag = False
                        for k, v in source_target_column_mapping_dict[
                                source_table].items():
                            if target_row[v] == source_row[k]:
                                compare_flag = True
                            else:
                                compare_flag = False
                                break
                        if compare_flag == True:
                            break
                    if compare_flag == False:
                        source_diff_list.append(source_row)
                    else:
                        pass

                for target_row in target_sample_data:
                    for source_row in source_sample_data_formated:
                        compare_flag = False
                        for k, v in source_target_column_mapping_dict[
                                source_table].items():
                            if source_row[k] == target_row[v]:
                                compare_flag = True
                            else:
                                compare_flag = False
                                break
                        if compare_flag == True:
                            break
                    if compare_flag == False:
                        target_diff_list.append(target_row)
                    else:
                        pass
                print("source_diff_list:" + str(source_diff_list))
                print("target_diff_list:" + str(target_diff_list))
Пример #2
0
    def sample_data_test(self, id, pwd):
        '''read conf'''
        conf = ReadConfig()
        table_list = conf.Read_table_list()
        source_db_node = conf.Read_source_db_node()
        target_db_node = conf.Read_target_db_node()
        sample_data_report = []
        failed_sample_report = []
        fail_cell_count = 0
        for table in table_list:
            '''get the source target column mapping'''
            source_target_column_mapping = self.generate_source_target_column_map(id, pwd,table['SOURCE_SCHEMA'],\
                table['SOURCE_TABLE'], table['TARGET_SCHEMA'], table['TARGET_TABLE'])
            source_table_nm = table['SOURCE_SCHEMA']+'.'+table['SOURCE_TABLE']
            target_table_nm = table['TARGET_SCHEMA'] + '.' + table['TARGET_TABLE']
            sample_source_condition = conf.Read_where_condition(table['SOURCE_TABLE'])
            sample_target_condition = conf.Read_where_condition(table['TARGET_TABLE'])
            source_sql = "select * from {} where {}".format(source_table_nm,sample_source_condition)
            target_sql = "select * from {} where {}".format(target_table_nm,sample_target_condition)
            rs_source = db_connect.exec_sql_common(source_db_node, id, pwd, source_sql)
            rs_target = db_connect.exec_sql_common(target_db_node, id, pwd, target_sql)
            source_row_count = len(rs_source)
            target_row_count = len(rs_target)
            source_column_count = len(rs_source[0])
            target_column_count = len(rs_target[0])
            print("source table: %s" % source_table_nm)
            print("target table: %s" % target_table_nm)

            '''step 1 compare the row number between source and target'''
            if source_row_count == target_row_count:
                print("The sample sql returns the same row count")
            else:
                print("The sample sql returns the different row count,the test validate failed")
                raise TestException.SampleDataError
                '''step 2 compare the column number between source and target'''
            if source_column_count == target_column_count + 3:
                print("The sample sql return the same column count")
            else:
                print("The sample sql returns the different row count,the test validate failed")
                raise TestException.SampleDataError

                '''step 3 loop to compare the result from source and target'''

                for i in range(source_row_count):
                    for k, v in source_target_column_mapping.items():
                        sample_compare_dict = dict()
                        source_value = rs_source[i][k]
                        target_value = rs_target[i][v]
                        sample_compare_dict['SOURCE_TABLE_NM'] = source_table_nm
                        sample_compare_dict['SOURCE_COLUMN_NM'] = k
                        sample_compare_dict['SOURCE_COLUMN_VALUE'] = source_value
                        sample_compare_dict['TARGET_TABLE_NM'] = target_table_nm
                        sample_compare_dict['TARGET_COLUMN_NM'] = v
                        sample_compare_dict['TARGET_COLUMN_VALUE'] = target_value
                        sample_compare_dict['STATUS'] = ('PASS' if(source_value == target_value) else 'FAIL')
                        sample_data_report.append(sample_compare_dict)
                        if sample_compare_dict['STATUS'] == 'FAIL':
                            failed_sample_report.append[sample_compare_dict]
        '''write failed record to failed file '''
        with open("tmp/failed_sample_data_report.json",'w',encoding ='UTF-8') as f:
            json.dump(failed_sample_report, f)
        '''write all the record to report file'''
        with open("tmp/sample_data_report.json",'w',encoding ='UTF-8') as f:
            json.dump(sample_data_report, f)

        if len(failed_sample_report) > 0:
            print("There are some cell values not equal, the test failed,check the failed report file for detail")
            raise TestException.SampleDataError()
        else:
            print("The sample data test run passed")
        return 'PASS'