async def _prepare_content_type_tables(self): """ Подготавливает соответствие content_type_id и наименование таблицы в БД """ logger.info("prepare content type tables") content_type_table_list = await self._dst_database.fetch_raw_sql( SQLRepository.get_content_type_table_sql() ) content_type_table_dict = { (app_label, model): table_name for table_name, app_label, model in content_type_table_list } content_type_list = await self._src_database.fetch_raw_sql( SQLRepository.get_content_type_sql() ) content_type_dict = { (app_label, model): content_type_id for content_type_id, app_label, model in content_type_list } for key in content_type_table_dict.keys(): self.content_type_table[content_type_table_dict[key]] = ( content_type_dict[key] ) del content_type_table_list[:] del content_type_table_dict del content_type_list[:] del content_type_dict
async def _prepare_revert_table( self, table: DBTable, revert_table: DBTable, revert_columns: Set[DBColumn], ): """ Preparing revert table """ logger.info(f'prepare revert table {revert_table.name}') if ( revert_table.fk_columns_with_key_column and not table.with_key_column ): return if revert_table.need_transfer_pks: coroutines = [ self._get_revert_table_column_values( table=table, revert_table=revert_table, revert_column=revert_column, ) for revert_column in revert_columns ] if coroutines: await asyncio.wait(coroutines)
async def _build_key_column_values_hierarchical_structure(self): """ Building tree of hierarchy key table records by parent_id column """ logger.info("build tree of enterprises for transfer process") key_table: DBTable = self._dst_database.tables.get(settings.KEY_TABLE_NAME) hierarchy_column = await key_table.get_column_by_name( column_name=settings.KEY_TABLE_HIERARCHY_COLUMN_NAME, ) if hierarchy_column: coroutines = [ asyncio.create_task( self._get_key_table_parents_values( key_table_primary_key_name=key_table.primary_key.name, key_table_primary_key_value=key_column_value, ) ) for key_column_value in copy(self._key_column_values) ] if coroutines: await asyncio.wait(coroutines) logger.info( f"transferring enterprises - " f"{make_str_from_iterable(self._key_column_values, with_quotes=True)}" # noqa )
def __init__( self, db_connection_parameters: DBConnectionParameters, ): super().__init__(db_connection_parameters=db_connection_parameters, ) logger.info('init dst database')
async def prepare_tables(self): """ Prepare tables structure for transferring data process """ logger.info('prepare tables structure for transferring process') self.tables = { f'{table_name}': DBTable(name=table_name, ) for table_name in self.table_names } chunks_table_names = make_chunks( iterable=self.table_names, size=settings.TABLES_LIMIT_PER_TRANSACTION, is_list=True, ) coroutines = [ self._prepare_chunk_tables(chunk_table_names=chunk_table_names, ) for chunk_table_names in chunks_table_names ] if coroutines: await asyncio.gather(*coroutines) logger.info(f'prepare tables progress - {len(self.tables.keys())}/' f'{len(self.table_names)}')
async def truncate_tables(self): """ Truncating tables """ if settings.IS_TRUNCATE_TABLES: logger.info('start truncating tables..') if settings.TABLES_TRUNCATE_INCLUDED: table_names = settings.TABLES_TRUNCATE_INCLUDED else: table_names = tuple( filter( lambda table_name: (table_name not in settings. TABLES_WITH_GENERIC_FOREIGN_KEY), self.table_names, )) if settings.TABLES_TRUNCATE_EXCLUDED: table_names = tuple( filter( lambda table_name: (table_name not in settings.TABLES_TRUNCATE_EXCLUDED), table_names, )) truncate_table_queries = SQLRepository.get_truncate_table_queries( table_names=table_names, ) for query in truncate_table_queries: await self.execute_raw_sql(query) logger.info('truncating tables finished.')
async def _collect_generic_tables_records_ids(self): """ Собирает идентификаторы записей таблиц, содержащих generic key Предполагается, что такие таблицы имеют поля object_id и content_type_id """ logger.info("collect generic tables records ids") await asyncio.wait([ asyncio.create_task(self._prepare_content_type_tables()), ]) generic_table_names = set( settings.TABLES_WITH_GENERIC_FOREIGN_KEY).difference( settings.EXCLUDED_TABLES) coroutines = [ asyncio.create_task( self._prepare_generic_table_data( self._dst_database.tables.get(table_name))) for table_name in filter(None, generic_table_names) ] if coroutines: await asyncio.wait(coroutines) logger.info("finish collecting")
async def _update_sequences(self): """ Обновление значений счетчиков на макситальные """ logger.info("start updating sequences...") await self._dst_database.set_max_tables_sequences() logger.info("finished updating sequences!")
async def _transfer_chunk_table_data( self, table: DBTable, need_import_ids_chunk: List[Union[int, str]], ): """ Порционный перенос данных таблицы в целевую БД """ transfer_sql = SQLRepository.get_transfer_records_sql( table=table, connection_params_str=self._src_database.connection_str, primary_key_ids=need_import_ids_chunk, ) logger.info(f'transfer chunk table data - {table.name}') transferred_ids = None async with self._dst_database.connection_pool.acquire() as connection: try: transferred_ids = await connection.fetch(transfer_sql) except ( UndefinedColumnError, NotNullViolationError, PostgresSyntaxError, ) as e: raise PostgresError( f'{str(e)}, table - {table.name}, ' f'sql - {transfer_sql} --- _transfer_chunk_table_data') if transferred_ids: transferred_ids = [tr[0] for tr in transferred_ids] table.transferred_pks.update(transferred_ids) del transfer_sql
def update_network_configuration_hostname(self,id_network, hostname,token): req_data = { "hostName": hostname} req_data = json.dumps(req_data) hearders= {'Authorization':'Bearer %s' %token, 'Content-Type': 'application/json'} logger.info("Request data: " + req_data) self._response = self.client.put(self._basePath +"/"+ id_network, headers=hearders, data=req_data) logger.info("respond json updated: " + json.dumps(self._response.json())) return self._response
async def prepare_structure(self): """ Prepare destination database structure """ await self.prepare_table_names() await self.prepare_tables() logger.info(f'dst database tables count - {len(self.table_names)}')
async def disable_triggers(self): """ Disable database triggers """ disable_triggers_sql = SQLRepository.get_disable_triggers_sql() await self.execute_raw_sql(disable_triggers_sql) logger.info('trigger disabled.')
async def enable_triggers(self): """ Enable database triggers """ enable_triggers_sql = SQLRepository.get_enable_triggers_sql() await self.execute_raw_sql(enable_triggers_sql) logger.info('triggers enabled.')
def is_full_prepared(self): logger.debug( f'table - {self.name} -- count table records {self.full_count} and ' f'need transfer pks {len(self.need_transfer_pks)}') if len(self.need_transfer_pks ) >= self.full_count - self.inaccuracy_count: # noqa logger.info(f'table {self.name} full transferred') return True
async def collect(self): logger.info('start preparing generic tables..') with StatisticIndexer( self._statistic_manager, TransferringStagesEnum.COLLECT_GENERIC_TABLES_RECORDS_IDS ): await asyncio.wait([self._collect_generic_tables_records_ids()]) logger.info('preparing generic tables finished.')
async def collect(self): logger.info('start preparing tables sorted by dependency..') not_transferred_tables = list( filter( lambda t: ( not t.is_ready_for_transferring and t.name not in settings.TABLES_WITH_GENERIC_FOREIGN_KEY ), self._dst_database.tables.values(), ) ) logger.debug( f'tables not transferring {str(len(not_transferred_tables))}' ) dependencies_between_models = [] for table in self._dst_database.tables_without_generics: for fk_column in table.not_self_fk_columns: dependencies_between_models.append( (table.name, fk_column.constraint_table.name) ) sorted_dependencies_result = topological_sort( dependency_pairs=dependencies_between_models, ) sorted_dependencies_result.cyclic.reverse() sorted_dependencies_result.sorted.reverse() sorted_tables_by_dependency = ( sorted_dependencies_result.cyclic + sorted_dependencies_result.sorted ) without_relatives = list( { table.name for table in self._dst_database.tables_without_generics }.difference( sorted_tables_by_dependency ) ) sorted_tables_by_dependency = without_relatives + sorted_tables_by_dependency # явно ломаю асинхронность, т.к. порядок импорта таблиц важен for table_name in sorted_tables_by_dependency: table = self._dst_database.tables[table_name] if not table.is_ready_for_transferring: await self._prepare_unready_table( table=table, ) logger.info('preparing tables sorted by dependency finished.')
async def _prepare_key_table_values(self): logger.info('prepare key table values...') key_table = self._dst_database.tables[settings.KEY_TABLE_NAME] key_table.update_need_transfer_pks( need_transfer_pks=self._key_column_values, ) key_table.is_ready_for_transferring = True logger.info('preparing key table values finished!')
async def _set_tables_counters(self): logger.info('start filling tables max pk and count of records..') coroutines = [ self._set_table_counters(table_name) for table_name in sorted(self._dst_database.tables.keys()) ] if coroutines: await asyncio.wait(coroutines) logger.info('finished filling tables max pk and count of records.')
def manage(self): start = datetime.now() logger.info(f'date start - {start}') asyncio.run( self._main(), debug=settings.TEST_MODE, ) finish = datetime.now() logger.info(f'dates start - {start}, finish - {finish}, spend time - ' f'{finish - start}')
async def _prepare_tables_with_key_column( self, table: DBTable, ): """ Preparing tables with key column and siblings """ logger.info( f'start preparing table with key column "{table.name}"' ) if table.is_ready_for_transferring: return need_transfer_pks = await self._get_table_column_values( table=table, column=table.primary_key, ) table.is_checked = True if need_transfer_pks: table.update_need_transfer_pks( need_transfer_pks=need_transfer_pks, ) await asyncio.wait( [ asyncio.create_task( self._direct_recursively_preparing_table( table=table, need_transfer_pks=need_transfer_pks, ) ), ] ) await asyncio.wait( [ asyncio.create_task( self._revert_recursively_preparing_table( table=table, ) ), ] ) del need_transfer_pks logger.info( f'finished preparing table with key column "{table.name}"' )
def print_transferring_indications(self): """ Output transferring indications to log """ for stage in TransferringStagesEnum.values.keys(): if stage in self._time_indications: logger.info( f"{TransferringStagesEnum.values.get(stage)} --- " f"{dates_list_to_str(self._time_indications[stage])}") if stage in self._memory_usage_indications: logger.info(f"{TransferringStagesEnum.values.get(stage)} --- " f"{self._memory_usage_indications[stage]}")
async def _prepare_generic_table_data(self, target_table: DBTable): """ Перенос данных из таблицы, содержащей generic foreign key """ logger.info(f"prepare generic table data {target_table.name}") coroutines = [ self._prepare_content_type_generic_data( target_table=target_table, rel_table_name=rel_table_name ) for rel_table_name in self.content_type_table.keys() ] if coroutines: await asyncio.wait(coroutines)
async def collect(self): logger.info( 'start preparing tables with key column and their siblings..') coroutines = [ asyncio.create_task(self._prepare_tables_with_key_column(table)) for table in self._dst_database.tables_with_key_column ] if coroutines: await asyncio.wait(coroutines) for dst_table in self._dst_database.tables.values(): if dst_table.is_checked: dst_table.is_ready_for_transferring = True logger.info( 'finished preparing tables with key column and their siblings..')
def print_records_transfer_statistic(self): """ Output transferred tables rows count """ tables: Iterable[DBTable] = self._database.tables.values() tables_counts = { table.name: (table.transferred_pks_count, len(table.need_transfer_pks)) for table in tables } sorted_tables_counts = (sorted(tables_counts, key=lambda t_n: tables_counts[t_n][0])) for table_name in sorted_tables_counts: logger.info(f"{table_name} --- {tables_counts[table_name][0]} / " f"{tables_counts[table_name][1]}")
async def _prepare_content_type_generic_data( self, target_table: DBTable, rel_table_name: str, ): if not rel_table_name: logger.debug('not send rel_table_name') return rel_table = self._dst_database.tables.get(rel_table_name) if not rel_table: logger.debug(f'table {rel_table_name} not found') return object_id_column = await target_table.get_column_by_name('object_id') if rel_table.primary_key.data_type != object_id_column.data_type: logger.debug( f'pk of table {rel_table_name} has an incompatible data type' ) return logger.info('prepare content type generic data') where_conditions = { 'object_id': rel_table.need_transfer_pks, 'content_type_id': [self.content_type_table[rel_table.name]], } need_transfer_pks = await self._get_table_column_values( table=target_table, column=target_table.primary_key, where_conditions_columns=where_conditions, ) logger.info( f'{target_table.name} need transfer pks {len(need_transfer_pks)}' ) target_table.update_need_transfer_pks( need_transfer_pks=need_transfer_pks, ) del where_conditions del need_transfer_pks
def _print_result(self): """ Print validation result """ result_table = PrettyTable() result_table.field_names = ['Validator', 'Is valid', 'Message'] for validator_class_name, ( is_valid, message) in self._validation_result.items(): # noqa result_table.add_row(( validator_class_name, is_valid, message, )) logger.info(result_table)
async def _transfer_collecting_data(self): """ Физический импорт данных в целевую БД из БД-донора """ logger.info("start transferring data to target db...") need_imported_tables = filter( lambda table: table.need_transfer_pks, self._dst_database.tables.values(), ) coroutines = [ self._transfer_table_data(table) for table in need_imported_tables ] if coroutines: await asyncio.gather(*coroutines) logger.info("finished transferring data to target db!")
async def _transfer_table_data(self, table): """ Перенос данных таблицы """ logger.info(f"start transferring table \"{table.name}\", " f"need to import - {len(table.need_transfer_pks)}") need_import_ids_chunks = make_chunks( iterable=table.need_transfer_pks, size=self.CHUNK_SIZE, ) for need_import_ids_chunk in need_import_ids_chunks: await self._transfer_chunk_table_data( table=table, need_import_ids_chunk=need_import_ids_chunk, ) logger.info(f"finished transferring table \"{table.name}\"")
async def _main(self): """ Run async databaser """ async with asyncpg.create_pool( self._dst_database.connection_str, min_size=30, max_size=40, ) as dst_pool: async with asyncpg.create_pool( self._src_database.connection_str, min_size=30, max_size=40, ) as src_pool: self._src_database.connection_pool = src_pool self._dst_database.connection_pool = dst_pool await self._src_database.prepare_table_names() logger.info( f'src_database tables count - ' f'{len(self._src_database.table_names)}' ) fdw_wrapper = PostgresFDWExtensionWrapper( src_database=self._src_database, dst_database=self._dst_database, dst_pool=dst_pool, ) await asyncio.wait( [ asyncio.create_task( fdw_wrapper.disable() ), ] ) async with statistic_indexer( self._statistic_manager, TransferringStagesEnum.PREPARE_DST_DB_STRUCTURE, ): await self._dst_database.prepare_structure() await self._dst_database.disable_triggers() await asyncio.wait( [ asyncio.create_task( self._build_key_column_values_hierarchical_structure() # noqa ), ] ) async with statistic_indexer( self._statistic_manager, TransferringStagesEnum.TRUNCATE_DST_DB_TABLES, ): await self._dst_database.truncate_tables() await asyncio.wait( [ asyncio.create_task( fdw_wrapper.enable() ), ] ) async with statistic_indexer( self._statistic_manager, TransferringStagesEnum.FILLING_TABLES_ROWS_COUNTS, ): await self._set_tables_counters() collector_manager = CollectorManager( src_database=self._src_database, dst_database=self._dst_database, statistic_manager=self._statistic_manager, key_column_values=self._key_column_values, ) await asyncio.wait( [ asyncio.create_task( collector_manager.manage() ), ] ) transporter = Transporter( dst_database=self._dst_database, src_database=self._src_database, statistic_manager=self._statistic_manager, key_column_values=self._key_column_values, ) async with statistic_indexer( self._statistic_manager, TransferringStagesEnum.PREPARING_AND_TRANSFERRING_DATA, ): await asyncio.wait( [ asyncio.create_task( transporter.transfer() ), ] ) await self._dst_database.enable_triggers() await asyncio.wait( [ asyncio.create_task( fdw_wrapper.disable() ), ] ) self._statistic_manager.print_transferring_indications() self._statistic_manager.print_records_transfer_statistic() if settings.TEST_MODE: validator_manager = ValidatorManager( dst_database=self._dst_database, src_database=self._src_database, statistic_manager=self._statistic_manager, key_column_values=self._key_column_values, ) await validator_manager.validate()
async def _prepare_unready_table( self, table: DBTable, ): """ Preparing table records for transferring """ logger.info( f'start preparing table "{table.name}"' ) # обход таблиц связанных через внешние ключи where_conditions_columns = {} fk_columns = table.highest_priority_fk_columns with_full_transferred_table = False for fk_column in fk_columns: logger.debug(f'prepare column {fk_column.name}') fk_table = self._dst_database.tables[ fk_column.constraint_table.name ] if fk_table.need_transfer_pks: if not fk_table.is_full_prepared: where_conditions_columns[fk_column.name] = ( fk_table.need_transfer_pks ) else: with_full_transferred_table = True if ( fk_columns and not where_conditions_columns and not with_full_transferred_table ): return table_pks = await self._get_table_column_values( table=table, column=table.primary_key, where_conditions_columns=where_conditions_columns, ) if ( fk_columns and where_conditions_columns and not table_pks ): return table.update_need_transfer_pks( need_transfer_pks=table_pks, ) logger.debug( f'table "{table.name}" need transfer pks - ' f'{len(table.need_transfer_pks)}' ) del table_pks # обход таблиц ссылающихся на текущую таблицу logger.debug('prepare revert tables') coroutines = [ self._prepare_revert_table( table=table, revert_table=revert_table, revert_columns=revert_columns, ) for revert_table, revert_columns in table.revert_foreign_tables.items() # noqa ] if coroutines: await asyncio.wait(coroutines) if not table.need_transfer_pks: all_records = await self._get_table_column_values( table=table, column=table.primary_key, ) table.update_need_transfer_pks( need_transfer_pks=all_records, ) del all_records table.is_ready_for_transferring = True logger.info( f'finished collecting records ids of table "{table.name}"' )