def check_translation(cursor, table_nm): list_sql = "SELECT table_name,column_name FROM information_schema.columns WHERE table_schema = 'dbo' AND table_name ='" + table_nm + "' AND DATA_TYPE = 'varchar' AND CHARACTER_MAXIMUM_LENGTH > 14 ORDER BY table_name, ordinal_position;" check_list = query(cursor, list_sql) for item in check_list: column_name_str = str(item[1]) check_sql = "SELECT TOP 1 " + column_name_str + " FROM " + table_nm + " WITH(NOLOCK) WHERE " + column_name_str + " LIKE '%<<translatable%'" if has_data(cursor, check_sql): msg = "\n\033[32m" + table_nm + "." + column_name_str + "\033[0m has un-translate string, please verify. SELECT TOP 100 " + column_name_str + " FROM " + table_nm + " WITH(NOLOCK) WHERE " + column_name_str + " LIKE '%<<translatable%'\n" add_msg('3 translation', table_nm, column_name_str, msg)
def check_columns(cursor, table_nm, business_key_conf): generate_raw_list = "SELECT table_name,column_name,ordinal_position, data_type FROM information_schema.columns WHERE table_schema = 'dbo' AND table_name = '" + table_nm + "' ORDER BY ordinal_position" rs_list = query(cursor, generate_raw_list) has_mart_source_id = False has_awo_id = False has_cur_rec_ind = False has_current_record_ind = False for item in rs_list: column_name = str(item[1]) position = item[2] #data_type = str(item[3]) # Log # ''' print("checking: \033[32m" + table_nm + "\033[0m.\033[34m" + column_name+"\033[0m") if column_name == 'CUSTOMER_NB': print('test') pass ''' if position == 1: pk_column = column_name # checking if column values are all NULL except the -1 one null_check_sql = "SELECT TOP 1 " + column_name + " FROM " + table_nm + " WITH(NOLOCK) WHERE " + pk_column + " > 0 AND " + column_name + " IS NOT NULL" not_empty_ind = True not_empty_ind = has_data(cursor, null_check_sql) if not not_empty_ind: msg = "\033[32m" + table_nm + "." + column_name + "\033[0m is empty." add_msg('5 column_check', table_nm, column_name, msg) elif not_empty_ind: null_check_sql = "SELECT TOP 1 " + column_name + " FROM " + table_nm + " WITH(NOLOCK) WHERE " + pk_column + " > 0 AND convert(varchar," + column_name + ") <> ''" if not has_data(cursor, null_check_sql): msg = "\033[32m" + table_nm + "." + column_name + "\033[0m are all empty string." add_msg('5 column_check', table_nm, column_name, msg) elif column_name == "MART_SOURCE_ID": has_mart_source_id = True elif column_name == "AWO_ID": has_awo_id = True elif column_name == "CUR_REC_IND": has_cur_rec_ind = True elif column_name == "CURRENT_RECORD_IND": has_current_record_ind = True elif column_name.endswith( "_KEY" ) and column_name != "LABEL_KEY" and table_nm != "D_TRANSLATION": key_check_sql = "SELECT TOP 1 " + column_name + " FROM " + table_nm + " WITH(NOLOCK) WHERE " + column_name + " > -1" if not has_data(cursor, key_check_sql): msg = "\033[32m" + table_nm + "." + column_name + "\033[0m \033[33mis all -1, please verify.\033[0m" add_msg('5 column_check', table_nm, column_name, msg) # for KEYs, check if there is NULL value, which should NOT null_value_check_sql = "SELECT TOP 1 " + column_name + " FROM " + table_nm + " WITH(NOLOCK) WHERE " + column_name + " IS NULL" if has_data(cursor, null_value_check_sql): msg = "\033[32m" + table_nm + "." + column_name + "\033[0m has \033[22mNULL\033[0m value, please verify." add_msg('5 column_check', table_nm, column_name, msg) if column_name.endswith("_DATE_KEY") or column_name.endswith( "_TIME_KEY") or column_name.endswith( "ITEM_KEY") or column_name.endswith("ORDER_KEY"): check_sql = "SELECT TOP 1 " + column_name + " FROM " + table_nm + " WITH(NOLOCK) WHERE " + column_name + " = -1 AND " + pk_column + " > 0;" if has_data(cursor, check_sql): msg = "\033[32m" + table_nm + "." + column_name + "\033[0m has \033[22m-1\033[0m value, please verify." add_msg('5 column_check', table_nm, column_name, msg) check_duplicate(cursor, has_mart_source_id, has_awo_id, has_cur_rec_ind, has_current_record_ind, table_nm, business_key_conf)
def check_duplicate(cursor, has_mart_source_id, has_awo_id, has_cur_rec_ind, has_current_record_ind, table_nm, business_key_conf): if table_nm.startswith("B_"): find_table_ind = False for entity in business_key_conf: if entity['TABLE'] == table_nm: find_table_ind = True duplicate_check_sql = "SELECT " + entity[ 'COLUMNS'] + " FROM " + entity['TABLE'] + entity[ 'WHERE'] + " GROUP BY " + entity[ 'COLUMNS'] + " HAVING COUNT(*) > 1" #print(duplicate_check_sql) has_duplicate = has_data(cursor, duplicate_check_sql) if has_duplicate: msg = "\n\033[31m" + entity[ 'TABLE'] + " has duplicate data on " + entity[ 'COLUMNS'] + "\033[0m, please check by \n << \033[33m" + duplicate_check_sql + "\033[0m >>\n" add_msg('4 duplicates', table_nm, entity['COLUMNS'], msg) if not find_table_ind: msg = "No conf for table: " + table_nm add_msg('4 duplicates', table_nm, '0', msg) else: find_table_ind = False for entity in business_key_conf: if entity['TABLE'] == table_nm: find_table_ind = True duplicate_check_sql = "SELECT " + entity[ 'COLUMNS'] + " FROM " + entity['TABLE'] + entity[ 'WHERE'] + " GROUP BY " + entity[ 'COLUMNS'] + " HAVING COUNT(*) > 1" has_duplicate = has_data(cursor, duplicate_check_sql) if has_duplicate: msg = "\n\033[31m" + entity[ 'TABLE'] + " has duplicate data on " + entity[ 'COLUMNS'] + "\033[0m, please check by \n << \033[33m" + duplicate_check_sql + "\033[0m >>\n" add_msg('4 duplicates', table_nm, entity['COLUMNS'], msg) if not find_table_ind: if has_mart_source_id == True: duplicate_check_sql = "SELECT MART_SOURCE_ID FROM " + table_nm + " GROUP BY MART_SOURCE_ID HAVING COUNT(*) > 1" has_duplicate = query_first_value(cursor, duplicate_check_sql) if has_duplicate: msg = "\n\033[31m" + table_nm + " has duplicate data on MART_SOURCE_ID \033[0m, please check by \n << \033[33mSELECT * FROM " + table_nm + " WHERE MART_SOURCE_ID = " + str( has_duplicate) + "\033[0m >>\n" add_msg('4 duplicates', table_nm, 'MART_SOURCE_ID', msg) if has_awo_id == True and has_cur_rec_ind == True: duplicate_check_sql = "SELECT AWO_ID FROM " + table_nm + " WHERE CUR_REC_IND = 1 GROUP BY AWO_ID HAVING COUNT(*) > 1" has_duplicate = query_first_value(cursor, duplicate_check_sql) if has_duplicate: msg = "\n\033[31m" + table_nm + " has duplicate data on AWO_ID \033[0m, please check by \n << \033[33mSELECT * FROM " + table_nm + " WHERE AWO_ID = " + str( has_duplicate) + "\033[0m >>\n" add_msg('4 duplicates', table_nm, 'AWO_ID', msg) if has_awo_id == True and has_current_record_ind == True: duplicate_check_sql = "SELECT AWO_ID FROM " + table_nm + " WHERE CURRENT_RECORD_IND = 1 GROUP BY AWO_ID HAVING COUNT(*) > 1" has_duplicate = query_first_value(cursor, duplicate_check_sql) if has_duplicate: msg = "\n\033[31m" + table_nm + " has duplicate data on AWO_ID \033[0m, please check by \n << \033[33mSELECT * FROM " + table_nm + " WHERE AWO_ID = " + str( has_duplicate) + "\033[0m >>\n" add_msg('4 duplicates', table_nm, 'AWO_ID', msg) elif has_awo_id == True and has_current_record_ind == False and has_cur_rec_ind == False: duplicate_check_sql = "SELECT AWO_ID FROM " + table_nm + " GROUP BY AWO_ID HAVING COUNT(*) > 1" has_duplicate = query_first_value(cursor, duplicate_check_sql) if has_duplicate: msg = "\n\033[31m" + table_nm + " has duplicate data on AWO_ID \033[0m, please check by \n << \033[33mSELECT * FROM " + table_nm + " WHERE AWO_ID = " + str( has_duplicate) + "\033[0m >>\n" add_msg('4 duplicates', table_nm, 'AWO_ID', msg)
def check_column(cursor, tb_list, business_key_conf): generate_raw_list = "SELECT table_name,column_name FROM information_schema.columns WHERE table_schema = 'dbo' AND table_name IN (" + tb_list + ")ORDER BY table_name, ordinal_position" rs_list = query(cursor, generate_raw_list) has_mart_source_id = False has_awo_id = False has_cur_rec_ind = False has_current_record_ind = False old_table_name = "" table_count = 0 row_count = 0 pk_column = "" for item in rs_list: table_name = item[0] column_name = item[1] # testing code # print("checking: \033[32m" + table_name + "\033[0m.\033[34m" + column_name + "\033[0m") if column_name == 'LATITUDE_VAL': print('test') pass if row_count == 0: pk_column = column_name if old_table_name != table_name: pk_column = column_name if table_count != 0: check_duplicate(cursor, has_mart_source_id, has_awo_id, has_cur_rec_ind, has_current_record_ind, old_table_name, business_key_conf) table_count += 1 has_mart_source_id = False has_awo_id = False has_cur_rec_ind = False has_current_record_ind = False old_table_name = table_name # checking if column values are all NULL except the -1 one null_check_sql = "SELECT TOP 1 " + column_name + " FROM " + table_name + " WITH(NOLOCK) WHERE " + pk_column + " > 0 AND " + column_name + " IS NOT NULL" not_empty_ind = True not_empty_ind = has_data(cursor, null_check_sql) if not not_empty_ind: print("\033[32m" + table_name + "." + column_name + "\033[0m is empty.") elif not_empty_ind: null_check_sql = "SELECT TOP 1 " + column_name + " FROM " + table_name + " WITH(NOLOCK) WHERE " + pk_column + " > 0 AND convert(varchar," + column_name + ") <> ''" if not has_data(cursor, null_check_sql): print("\033[32m" + table_name + "." + column_name + "\033[0m are all empty string.") elif str(column_name) == "MART_SOURCE_ID": has_mart_source_id = True elif str(column_name) == "AWO_ID": has_awo_id = True elif str(column_name) == "CUR_REC_IND": has_cur_rec_ind = True elif str(column_name) == "CURRENT_RECORD_IND": has_current_record_ind = True elif str(column_name).endswith("_KEY") and ( column_name != "LABEL_KEY" and table_name != "D_TRANSLATION"): key_check_sql = "SELECT TOP 1 " + column_name + " FROM " + table_name + " WITH(NOLOCK) WHERE " + column_name + " > -1" if not has_data(cursor, key_check_sql): print("\033[32m" + table_name + "." + column_name + "\033[0m \033[33mis all -1, please verify.\033[0m") # for KEYs, check if there is NULL value, which should NOT null_value_check_sql = "SELECT TOP 1 " + column_name + " FROM " + table_name + " WITH(NOLOCK) WHERE " + column_name + " IS NULL" if has_data(cursor, null_value_check_sql): print("\033[32m" + table_name + "." + column_name + "\033[0m has \033[22mNULL\033[0m value, please verify.") if str(column_name).endswith("_DATE_KEY") or str( column_name).endswith("_TIME_KEY") or str( column_name).endswith("ITEM_KEY") or str( column_name).endswith("ORDER_KEY"): check_sql = "SELECT TOP 1 " + column_name + " FROM " + table_name + " WITH(NOLOCK) WHERE " + column_name + " = -1 AND " + pk_column + " > 0;" if has_data(cursor, check_sql): print( "\033[32m" + table_name + "." + column_name + "\033[0m has \033[22m-1\033[0m value, please verify.") row_count += 1 check_duplicate(cursor, has_mart_source_id, has_awo_id, has_cur_rec_ind, has_current_record_ind, old_table_name, business_key_conf) return table_count