def _get_foreign_keys_metadata(conversion, table_name): """ Retrieves foreign keys metadata. :param conversion: Conversion :param table_name: str :return: None """ log_title = 'ForeignKeyProcessor::_get_foreign_keys_metadata' msg = '\t--[%s] Search foreign keys for table "%s"."%s"...' % ( log_title, conversion.schema, table_name) FsOps.log(conversion, msg) sql = """ SELECT cols.COLUMN_NAME, refs.REFERENCED_TABLE_NAME, refs.REFERENCED_COLUMN_NAME, cRefs.UPDATE_RULE, cRefs.DELETE_RULE, cRefs.CONSTRAINT_NAME FROM INFORMATION_SCHEMA.`COLUMNS` AS cols INNER JOIN INFORMATION_SCHEMA.`KEY_COLUMN_USAGE` AS refs ON refs.TABLE_SCHEMA = cols.TABLE_SCHEMA AND refs.REFERENCED_TABLE_SCHEMA = cols.TABLE_SCHEMA AND refs.TABLE_NAME = cols.TABLE_NAME AND refs.COLUMN_NAME = cols.COLUMN_NAME LEFT JOIN INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS AS cRefs ON cRefs.CONSTRAINT_SCHEMA = cols.TABLE_SCHEMA AND cRefs.CONSTRAINT_NAME = refs.CONSTRAINT_NAME LEFT JOIN INFORMATION_SCHEMA.`KEY_COLUMN_USAGE` AS links ON links.TABLE_SCHEMA = cols.TABLE_SCHEMA AND links.REFERENCED_TABLE_SCHEMA = cols.TABLE_SCHEMA AND links.REFERENCED_TABLE_NAME = cols.TABLE_NAME AND links.REFERENCED_COLUMN_NAME = cols.COLUMN_NAME LEFT JOIN INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS AS cLinks ON cLinks.CONSTRAINT_SCHEMA = cols.TABLE_SCHEMA AND cLinks.CONSTRAINT_NAME = links.CONSTRAINT_NAME WHERE cols.TABLE_SCHEMA = '%s' AND cols.TABLE_NAME = '%s'; """ % (conversion.mysql_db_name, ExtraConfigProcessor.get_table_name( conversion, table_name, should_get_original=True)) result = DBAccess.query(conversion=conversion, caller=log_title, sql=sql, vendor=DBVendors.MYSQL, process_exit_on_error=False, should_return_client=False) if result.error: return extra_rows = ExtraConfigProcessor.parse_foreign_keys( conversion, table_name) full_rows = (result.data or []) + extra_rows ForeignKeyProcessor._set_foreign_keys_for_given_table( conversion, table_name, full_rows) msg = '\t--[%s] Foreign keys for table "%s"."%s" are set...' % ( log_title, conversion.schema, table_name) FsOps.log(conversion, msg)
def create_indexes(conversion, table_name): """ Creates indexes, including PK, on given table. :param conversion: Conversion :param table_name: str :return: None """ log_title = 'IndexesProcessor::create_indexes' original_table_name = ExtraConfigProcessor.get_table_name(conversion, table_name, should_get_original=True) show_index_result = DBAccess.query( conversion=conversion, caller=log_title, sql='SHOW INDEX FROM `%s`;' % original_table_name, vendor=DBVendors.MYSQL, process_exit_on_error=False, should_return_client=False ) if show_index_result.error: return pg_indexes = {} for index in show_index_result.data: pg_column_name = ExtraConfigProcessor.get_column_name( conversion=conversion, original_table_name=original_table_name, current_column_name=index['Column_name'], should_get_original=False ) if index['Key_name'] in pg_indexes: pg_indexes[index['Key_name']]['column_name'].append('"%s"' % pg_column_name) continue pg_index_type = 'GIST' if index['Index_type'] == 'SPATIAL' else index['Index_type'] pg_indexes[index['Key_name']] = { 'is_unique': index['Non_unique'] == 0, 'column_name': ['"%s"' % pg_column_name], 'index_type': ' USING %s' % pg_index_type, } params = [ [conversion, index_name, table_name, pg_indexes, idx] for idx, index_name in enumerate(pg_indexes.keys()) ] ConcurrencyManager.run_in_parallel(conversion, IndexesProcessor._set_index, params) msg = '\t--[%s] "%s"."%s": PK/indices are successfully set...' % (log_title, conversion.schema, table_name) FsOps.log(conversion, msg, conversion.dic_tables[table_name].table_log_path)
def _set_column_comment(conversion, table_name, original_table_name, column): """ Creates comment on specified column. :param conversion: Conversion :param table_name: str :param original_table_name: str :param column: dict :return: None """ log_title = 'CommentsProcessor::_set_column_comment' column_name = ExtraConfigProcessor.get_column_name( conversion=conversion, original_table_name=original_table_name, current_column_name=column['Field'], should_get_original=False ) comment = CommentsProcessor._escape_quotes(column['Comment']) create_comment_result = DBAccess.query( conversion=conversion, caller=log_title, sql='COMMENT ON COLUMN "%s"."%s"."%s" IS \'%s\';' % (conversion.schema, table_name, column_name, comment), vendor=DBVendors.PG, process_exit_on_error=False, should_return_client=False ) if create_comment_result.error: return msg = '\t--[%s] Set comment for "%s"."%s" column: "%s"...' \ % (log_title, conversion.schema, table_name, column_name) FsOps.log(conversion, msg, conversion.dic_tables[table_name].table_log_path)
def set_sequence_value(conversion, table_name): """ Sets sequence value. :param conversion: Conversion :param table_name: str :return: None """ original_table_name = ExtraConfigProcessor.get_table_name( conversion, table_name, True) table_columns_list = conversion.dic_tables[table_name].table_columns auto_increment_columns = [ column for column in table_columns_list if column['Extra'] == 'auto_increment' ] if len(auto_increment_columns) == 0: return # No auto-incremented column found. log_title = 'SequenceProcessor::set_sequence_value' auto_increment_column = auto_increment_columns[0]['Field'] column_name = ExtraConfigProcessor.get_column_name( conversion=conversion, original_table_name=original_table_name, current_column_name=auto_increment_column, should_get_original=False) seq_name = '%s_%s_seq' % (table_name, column_name) sql = 'SELECT SETVAL(\'"%s"."%s"\',' % (conversion.schema, seq_name) sql += '(SELECT MAX("%s") FROM "%s"."%s"));' % ( column_name, conversion.schema, table_name) result = DBAccess.query(conversion=conversion, caller=log_title, sql=sql, vendor=DBVendors.PG, process_exit_on_error=False, should_return_client=False) if not result.error: msg = '\t--[%s] Sequence "%s"."%s" is created...' % ( log_title, conversion.schema, seq_name) FsOps.log(conversion, msg, conversion.dic_tables[table_name].table_log_path)
def prepare_data_chunks(conversion, table_name, have_data_chunks_processed): """ Prepares a list of tables metadata. :param conversion: Conversion :param table_name: str :param have_data_chunks_processed: bool :return: None """ if have_data_chunks_processed: return log_title = 'DataChunksProcessor::prepare_data_chunks' log_path = conversion.dic_tables[table_name].table_log_path original_table_name = ExtraConfigProcessor.get_table_name(conversion, table_name, True) select_field_list = ColumnsDataArranger.arrange_columns_data( conversion.dic_tables[table_name].table_columns, conversion.mysql_version ) rows_cnt_result = DBAccess.query( conversion=conversion, caller=log_title, sql='SELECT COUNT(1) AS rows_count FROM `%s`;' % original_table_name, vendor=DBVendors.MYSQL, process_exit_on_error=True, should_return_client=False ) rows_cnt = int(rows_cnt_result.data[0]['rows_count']) msg = '\t--[%s] Total rows to insert into "%s"."%s": %d' % (log_title, conversion.schema, table_name, rows_cnt) FsOps.log(conversion, msg, log_path) meta = { '_tableName': table_name, '_selectFieldList': select_field_list, '_rowsCnt': rows_cnt, } sql = 'INSERT INTO "{0}"."data_pool_{0}{1}"("metadata") VALUES (%(meta)s);' \ .format(conversion.schema, conversion.mysql_db_name) DBAccess.query( conversion=conversion, caller=log_title, sql=sql, vendor=DBVendors.PG, process_exit_on_error=True, should_return_client=False, client=None, bindings={'meta': json.dumps(meta)} )
def _process_columns_comments(conversion, table_name): """ Creates columns comments. :param conversion: Conversion :param table_name: str :return: None """ original_table_name = ExtraConfigProcessor.get_table_name(conversion, table_name, should_get_original=True) params = [ [conversion, table_name, original_table_name, column] for column in conversion.dic_tables[table_name].table_columns if column['Comment'] != '' ] ConcurrencyManager.run_in_parallel(conversion, CommentsProcessor._set_column_comment, params)
def _process_table_comments(conversion, table_name): """ Creates table comments. :param conversion: Conversion :param table_name: str :return: None """ log_title = 'CommentsProcessor::_process_table_comments' sql_select_comment = ''' SELECT table_comment AS table_comment FROM information_schema.tables WHERE table_schema = '%s' AND table_name = '%s'; ''' % ( conversion.mysql_db_name, ExtraConfigProcessor.get_table_name(conversion, table_name, should_get_original=True) ) select_comments_result = DBAccess.query( conversion=conversion, caller=log_title, sql=sql_select_comment, vendor=DBVendors.MYSQL, process_exit_on_error=False, should_return_client=False ) if select_comments_result.error: return comment = CommentsProcessor._escape_quotes(select_comments_result.data[0]['table_comment']) sql_create_comment = 'COMMENT ON TABLE "%s"."%s" IS \'%s\';' % (conversion.schema, table_name, comment) create_comment_result = DBAccess.query( conversion=conversion, caller=log_title, sql=sql_create_comment, vendor=DBVendors.PG, process_exit_on_error=False, should_return_client=False ) if create_comment_result.error: return msg = '\t--[%s] Successfully set comment for table "%s"."%s"' % (log_title, conversion.schema, table_name) FsOps.log(conversion, msg, conversion.dic_tables[table_name].table_log_path)
def process_enum(conversion, table_name): """ Defines which columns of the given table are of type "enum". Sets an appropriate constraint, if appropriate. :param conversion: Conversion :param table_name: str :return: None """ log_title = 'EnumProcessor::process_enum' msg = '\t--[%s] Defines "ENUMs" for table "%s"."%s"' % (log_title, conversion.schema, table_name) FsOps.log(conversion, msg, conversion.dic_tables[table_name].table_log_path) original_table_name = ExtraConfigProcessor.get_table_name(conversion, table_name, True) params = [ [conversion, table_name, original_table_name, column] for column in conversion.dic_tables[table_name].table_columns if EnumProcessor._is_enum(column) ] ConcurrencyManager.run_in_parallel(conversion, EnumProcessor._set_enum, params)
def _set_enum(conversion, table_name, original_table_name, column): """ Checks if given column is an enum. Sets the enum, if appropriate. :param conversion: Conversion :param table_name: str :param original_table_name: str :param column: dict :return: None """ log_title = 'EnumProcessor::_set_enum' column_name = ExtraConfigProcessor.get_column_name( conversion=conversion, original_table_name=original_table_name, current_column_name=column['Field'], should_get_original=False ) enum_values = column['Type'].split('(')[1] # Exists due to EnumProcessor._is_enum execution result. sql = 'ALTER TABLE "%s"."%s" ADD CHECK ("%s" IN (%s);' \ % (conversion.schema, table_name, column_name, enum_values) result = DBAccess.query( conversion=conversion, caller=log_title, sql=sql, vendor=DBVendors.PG, process_exit_on_error=False, should_return_client=False ) if not result.error: msg = '\t--[%s] Set "ENUM" for "%s"."%s"."%s"...' \ % (log_title, conversion.schema, table_name, column_name) FsOps.log(conversion, msg, conversion.dic_tables[table_name].table_log_path)
def _set_foreign_keys_for_given_table(conversion, table_name, rows): """ Sets foreign keys for given table. :param conversion: Conversion :param table_name: str :param rows: list :return: None """ constraints = {} original_table_name = ExtraConfigProcessor.get_table_name( conversion, table_name, should_get_original=True) for row in rows: current_column_name = ExtraConfigProcessor.get_column_name( conversion=conversion, original_table_name=original_table_name, current_column_name=row['COLUMN_NAME'], should_get_original=False) current_referenced_table_name = ExtraConfigProcessor.get_table_name( conversion=conversion, current_table_name=row['REFERENCED_TABLE_NAME'], should_get_original=False) original_referenced_table_name = ExtraConfigProcessor.get_table_name( conversion=conversion, current_table_name=row['REFERENCED_TABLE_NAME'], should_get_original=True) current_referenced_column_name = ExtraConfigProcessor.get_column_name( conversion=conversion, original_table_name=original_referenced_table_name, current_column_name=row['REFERENCED_COLUMN_NAME'], should_get_original=False) if row['CONSTRAINT_NAME'] in constraints: constraints[row['CONSTRAINT_NAME']]['column_name'].append( '"%s"' % current_column_name) constraints[ row['CONSTRAINT_NAME']]['referenced_column_name'].append( '"%s"' % current_referenced_column_name) return constraints[row['CONSTRAINT_NAME']] = {} constraints[row['CONSTRAINT_NAME']]['column_name'] = [ '"%s"' % current_column_name ] constraints[row['CONSTRAINT_NAME']]['referenced_column_name'] = [ '"%s"' % current_referenced_column_name ] constraints[row['CONSTRAINT_NAME']][ 'referenced_table_name'] = current_referenced_table_name constraints[ row['CONSTRAINT_NAME']]['update_rule'] = row['UPDATE_RULE'] constraints[ row['CONSTRAINT_NAME']]['delete_rule'] = row['DELETE_RULE'] params = [[conversion, constraints, foreign_key, table_name] for foreign_key in constraints.keys()] ConcurrencyManager.run_in_parallel( conversion, ForeignKeyProcessor._set_single_foreign_key, params)
def populate_table_worker( conversion, table_name, str_select_field_list, rows_cnt, data_pool_id, connection_to_master ): """ Loads a chunk of data using "PostgreSQL COPY". :param conversion: Conversion :param table_name: str :param str_select_field_list: str :param rows_cnt: int :param data_pool_id: int :param connection_to_master: multiprocessing.connection.Connection :return: None """ log_title = 'DataLoader::populate_table_worker' original_table_name = ExtraConfigProcessor.get_table_name(conversion, table_name, True) sql = 'SELECT %s FROM `%s`;' % (str_select_field_list, original_table_name) original_session_replication_role = None text_stream = None pg_cursor = None pg_client = None mysql_cursor = None mysql_client = None try: mysql_client = DBAccess.get_mysql_unbuffered_client(conversion) mysql_cursor = mysql_client.cursor() mysql_cursor.execute(sql) number_of_inserted_rows = 0 number_of_workers = min(conversion.max_each_db_connection_pool_size / 2, cpu_count()) with ProcessPoolExecutor(max_workers=number_of_workers) as executor: while True: batch_size = 25000 # TODO: think about batch size calculation. batch = mysql_cursor.fetchmany(batch_size) rows_to_insert = len(batch) if rows_to_insert == 0: break executor.submit( DataLoader._arrange_and_load_batch, conversion.config, table_name, batch, rows_cnt, rows_to_insert, number_of_inserted_rows ) except Exception as e: msg = 'Data retrieved by following MySQL query has been rejected by the target PostgreSQL server.' FsOps.generate_error(conversion, '\t--[{0}] {1}\n\t--[{0}] {2}'.format(log_title, e, msg), sql) finally: try: connection_to_master.send(table_name) except Exception as ex: msg = 'Failed to notify master that %s table\'s populating is finished.' % table_name FsOps.generate_error(conversion, '\t--[{0}] {1}\n\t--[{0}] {2}'.format(log_title, ex, msg)) finally: for resource in (connection_to_master, text_stream, pg_cursor, mysql_cursor, mysql_client): if resource: resource.close() DataLoader.delete_data_pool_item(conversion, data_pool_id, pg_client, original_session_replication_role)
def create_sequence(conversion, table_name): """ Defines which column in given table has the "auto_increment" attribute. Creates an appropriate sequence. :param conversion: Conversion :param table_name: str :return: None """ original_table_name = ExtraConfigProcessor.get_table_name( conversion, table_name, True) table_columns_list = conversion.dic_tables[table_name].table_columns auto_increment_columns = [ column for column in table_columns_list if column['Extra'] == 'auto_increment' ] if len(auto_increment_columns) == 0: return # No auto-incremented column found. log_title = 'SequenceProcessor::create_sequence' auto_increment_column = auto_increment_columns[0]['Field'] column_name = ExtraConfigProcessor.get_column_name( conversion=conversion, original_table_name=original_table_name, current_column_name=auto_increment_column, should_get_original=False) seq_name = '%s_%s_seq' % (table_name, column_name) create_sequence_result = DBAccess.query( conversion=conversion, caller=log_title, sql='CREATE SEQUENCE "%s"."%s";' % (conversion.schema, seq_name), vendor=DBVendors.PG, process_exit_on_error=False, should_return_client=True) if create_sequence_result.error: DBAccess.release_db_client(conversion, create_sequence_result.client) return sql_set_next_val = 'ALTER TABLE "%s"."%s" ALTER COLUMN "%s"' % ( conversion.schema, table_name, column_name) sql_set_next_val += " SET DEFAULT NEXTVAL('%s.%s');" % ( conversion.schema, seq_name) set_next_val_result = DBAccess.query( conversion=conversion, caller=log_title, sql=sql_set_next_val, vendor=DBVendors.PG, process_exit_on_error=False, should_return_client=True, client=create_sequence_result.client) if set_next_val_result.error: DBAccess.release_db_client(conversion, set_next_val_result.client) return sql_set_sequence_owner = 'ALTER SEQUENCE "{0}"."{1}" OWNED BY "{0}"."{2}"."{3}";'\ .format(conversion.schema, seq_name, table_name, column_name) set_sequence_owner_result = DBAccess.query( conversion=conversion, caller=log_title, sql=sql_set_sequence_owner, vendor=DBVendors.PG, process_exit_on_error=False, should_return_client=True, client=set_next_val_result.client) if set_sequence_owner_result.error: DBAccess.release_db_client(conversion, set_sequence_owner_result.client) return sql_set_sequence_value = 'SELECT SETVAL(\'"{0}"."{1}"\', (SELECT MAX("{2}") FROM "{0}"."{3}"));'\ .format(conversion.schema, seq_name, column_name, table_name) set_sequence_value_result = DBAccess.query( conversion=conversion, caller=log_title, sql=sql_set_sequence_value, vendor=DBVendors.PG, process_exit_on_error=False, should_return_client=False, client=set_sequence_owner_result.client) if not set_sequence_value_result.error: msg = '\t--[%s] Sequence "%s"."%s" is created...' % ( log_title, conversion.schema, seq_name) FsOps.log(conversion, msg, conversion.dic_tables[table_name].table_log_path)
def load_structure(conversion): """ Loads source tables and views, that need to be migrated. :param conversion: Conversion, the configuration object. :return: None """ log_title = 'StructureLoader::load_structure' StructureLoader._get_mysql_version(conversion) have_tables_loaded = MigrationStateManager.get(conversion, 'tables_loaded') sql = 'SHOW FULL TABLES IN `%s` WHERE 1 = 1' % conversion.mysql_db_name if len(conversion.include_tables) != 0: include_tables = ','.join([ '"%s"' % table_name for table_name in conversion.include_tables ]) sql += ' AND Tables_in_%s IN(%s)' % (conversion.mysql_db_name, include_tables) if len(conversion.exclude_tables) != 0: exclude_tables = ','.join([ '"%s"' % table_name for table_name in conversion.exclude_tables ]) sql += ' AND Tables_in_%s NOT IN(%s)' % (conversion.mysql_db_name, exclude_tables) sql += ';' result = DBAccess.query(conversion=conversion, caller=log_title, sql=sql, vendor=DBVendors.MYSQL, process_exit_on_error=True, should_return_client=False) tables_cnt, views_cnt = 0, 0 thread_pool_params = [] for row in result.data: relation_name = row['Tables_in_' + conversion.mysql_db_name] if row['Table_type'] == 'BASE TABLE' and Utils.get_index_of( relation_name, conversion.exclude_tables) == -1: relation_name = ExtraConfigProcessor.get_table_name( conversion, relation_name, False) conversion.tables_to_migrate.append(relation_name) conversion.dic_tables[relation_name] = Table( '%s/%s.log' % (conversion.logs_dir_path, relation_name)) thread_pool_params.append( [conversion, relation_name, have_tables_loaded]) tables_cnt += 1 elif row['Table_type'] == 'VIEW': conversion.views_to_migrate.append(relation_name) views_cnt += 1 ConcurrencyManager.run_in_parallel( conversion, StructureLoader.process_table_before_data_loading, thread_pool_params) msg = '''\t--[{0}] Source DB structure is loaded...\n\t--[{0}] Tables to migrate: {1}\n --[{0}] Views to migrate: {2}'''.format(log_title, tables_cnt, views_cnt) FsOps.log(conversion, msg) MigrationStateManager.set(conversion, 'tables_loaded')