def test_move(self): dir_path = os.path.dirname(os.path.realpath(__file__)) config_file_content = f"""migration-folder: {dir_path}/../resources python3-path: python3 nextflow-binary-path: nextflow nextflow-config-path: {dir_path}/workflow.config script-path: {dir_path}/../../ mongo-source-uri: mongodb://localhost:27017/admin mongo-source-secrets-file: {dir_path}/empty_secret_file mongo-dest-uri: mongodb://localhost:27018/admin mongo-dest-secrets-file: {dir_path}/empty_secret_file """ open(f"{dir_path}/migration_config.yml", "w").write(config_file_content) mover = MoveMongoDBs(migration_config_file=f"{dir_path}/migration_config.yml", dbs_to_migrate_list=f"{dir_path}/dbs_to_migrate.txt", batch_number="1", resume_flag=False) # Load data to source for db_name in mover.dbs_to_migrate: source_db = MongoDatabase(mover.migration_config["mongo-source-uri"], db_name=db_name) source_db.drop() source_db.restore_data(dump_dir=f"{dir_path}/../resources/{db_name}") mover.move() # Check if source data made it to the destination for db_name in mover.dbs_to_migrate: source_db = MongoDatabase(mover.migration_config["mongo-source-uri"], db_name=db_name) dest_db = MongoDatabase(mover.migration_config["mongo-dest-uri"], db_name=db_name) for collection_name in source_db.get_collection_names(): self.assertEqual(source_db.mongo_handle[db_name][collection_name].count_documents(filter={}), dest_db.mongo_handle[db_name][collection_name].count_documents(filter={}))
def _restore_data_to_another_db(self): with tempfile.TemporaryDirectory() as tempdir: self.test_mongo_db.dump_data(tempdir) test_restore_db = MongoDatabase(uri=self.uri, db_name=self.test_mongo_db.db_name + "_restore") test_restore_db.drop() test_restore_db.restore_data(dump_dir=tempdir, mongorestore_args={ "nsFrom": f'"{self.test_mongo_db.db_name}.*"', "nsTo": f'"{test_restore_db.db_name}.*"'}) return test_restore_db
def prepare_dest_db(mongo_source_db: MongoDatabase, mongo_dest_db: MongoDatabase): try: logger.info("Dropping target database if it already exists...") mongo_dest_db.drop() logger.info("Enabling sharding in the target database...") mongo_dest_db.enable_sharding() logger.info("Sharding collections in the target database...") mongo_dest_db.shard_collections( collections_shard_key_map, collections_to_shard=mongo_source_db.get_collection_names()) except Exception as ex: logger.error( f"Error while preparing destination database!\n{ex.__str__()}") sys.exit(1)
class TestMongoDatabase(TestCommon): dump_db_name = "test_mongo_db" uri = "mongodb://localhost:27017/admin" local_mongo_handle = pymongo.MongoClient() # Tests expect a local sharded Mongo instance def setUp(self) -> None: self.test_mongo_db = MongoDatabase(uri=self.uri, db_name=self.dump_db_name) self.dump_dir = os.path.join(self.resources_folder, self.dump_db_name) run_command_with_output( "Drop target test database if it already exists...", f"mongo {self.dump_db_name} " f"--eval 'db.dropDatabase()'") run_command_with_output("Import test database...", f"mongorestore --dir {self.dump_dir}") def tearDown(self) -> None: pass def _restore_data_to_another_db(self): with tempfile.TemporaryDirectory() as tempdir: self.test_mongo_db.dump_data(tempdir) test_restore_db = MongoDatabase( uri=self.uri, db_name=self.test_mongo_db.db_name + "_restore") test_restore_db.drop() test_restore_db.restore_data( dump_dir=tempdir, mongorestore_args={ "nsFrom": f'"{self.test_mongo_db.db_name}.*"', "nsTo": f'"{test_restore_db.db_name}.*"' }) return test_restore_db def test_drop_database(self): self.test_mongo_db.drop() self.assertTrue(self.dump_db_name not in self.local_mongo_handle.list_database_names()) def test_get_indexes(self): expected_index_map = { 'annotationMetadata_2_0': { '_id_': { 'key': [('_id', 1)], 'ns': 'test_mongo_db.annotationMetadata_2_0', 'v': 2 } }, 'annotations_2_0': { '_id_': { 'key': [('_id', 1)], 'ns': 'test_mongo_db.annotations_2_0', 'v': 2 }, 'ct.so_1': { 'background': True, 'key': [('ct.so', 1)], 'ns': 'test_mongo_db.annotations_2_0', 'v': 2 }, 'xrefs.id_1': { 'background': True, 'key': [('xrefs.id', 1)], 'ns': 'test_mongo_db.annotations_2_0', 'v': 2 } }, 'files_2_0': { '_id_': { 'key': [('_id', 1)], 'ns': 'test_mongo_db.files_2_0', 'v': 2 }, 'unique_file': { 'background': True, 'key': [('sid', 1), ('fid', 1), ('fname', 1)], 'ns': 'test_mongo_db.files_2_0', 'unique': True, 'v': 2 } }, 'variants_2_0': { '_id_': { 'key': [('_id', 1)], 'ns': 'test_mongo_db.variants_2_0', 'v': 2 }, 'annot.so_1': { 'background': True, 'key': [('annot.so', 1)], 'ns': 'test_mongo_db.variants_2_0', 'v': 2 }, 'annot.xrefs_1': { 'background': True, 'key': [('annot.xrefs', 1)], 'ns': 'test_mongo_db.variants_2_0', 'v': 2 }, 'chr_1_start_1_end_1': { 'background': True, 'key': [('chr', 1), ('start', 1), ('end', 1)], 'ns': 'test_mongo_db.variants_2_0', 'v': 2 }, 'files.sid_1_files.fid_1': { 'background': True, 'key': [('files.sid', 1), ('files.fid', 1)], 'ns': 'test_mongo_db.variants_2_0', 'v': 2 }, 'ids_1': { 'background': True, 'key': [('ids', 1)], 'ns': 'test_mongo_db.variants_2_0', 'v': 2 } } } self.assertDictEqual(expected_index_map, self.test_mongo_db.get_indexes()) def test_create_index_on_collections(self): collection_index_map = { 'files_2_0': { 'unique_file': { 'background': True, 'key': [('sid', 1), ('fid', 1), ('fname', 1)], 'ns': 'test_mongo_db.files_2_0', 'unique': True, 'v': 2 } } } test_restore_db = self._restore_data_to_another_db() test_restore_db.create_index_on_collections( collection_index_map=collection_index_map) test_restore_db_index_info = test_restore_db.get_indexes() # Check if index with the name "unique_file" is created on the collection self.assertTrue('files_2_0' in test_restore_db_index_info.keys()) self.assertTrue( 'unique_file' in test_restore_db_index_info['files_2_0']) self.assertEqual( [('sid', 1), ('fid', 1), ('fname', 1)], test_restore_db_index_info['files_2_0']['unique_file']['key']) def test_enable_sharding(self): self.test_mongo_db.enable_sharding() # Query meta-collection in the config database to check sharding status self.assertTrue( len( list(self.local_mongo_handle["config"]["databases"].find( { "_id": self.test_mongo_db.db_name, "partitioned": True }))) > 0) def test_shard_collections(self): test_restore_db = self._restore_data_to_another_db() collection_to_shard = "files_2_0" test_restore_db.enable_sharding() test_restore_db.shard_collections( collections_shard_key_map={ "files_2_0": (["sid", "fid", "fname"], True) }, collections_to_shard=[collection_to_shard]) # Query meta-collection in the config database to check sharding status self.assertTrue( len( list(self.local_mongo_handle["config"]["collections"].find({ "_id": f"{test_restore_db.db_name}.{collection_to_shard}", "key": { "sid": 1, "fid": 1, "fname": 1 } }))) > 0) def test_dump_data(self): with tempfile.TemporaryDirectory() as tempdir: self.test_mongo_db.dump_data(tempdir) self.assertTrue( os.path.isdir(os.path.join(tempdir, self.dump_db_name))) def test_archive_data(self): with tempfile.TemporaryDirectory() as tempdir: self.test_mongo_db.archive_data(tempdir, self.dump_db_name) self.assertTrue( os.path.isfile(os.path.join(tempdir, self.dump_db_name))) def test_restore_data(self): test_restore_db = self._restore_data_to_another_db() self.assertTrue(test_restore_db.db_name in self.local_mongo_handle.list_database_names()) def test_export_import_data(self): org_collection_name = "variants_2_0" mongo_export_args = {"collection": org_collection_name} with tempfile.TemporaryDirectory() as tempdir: export_file_path = os.path.join(tempdir, self.dump_db_name) coll_doc_count = self.test_mongo_db.mongo_handle[ self.dump_db_name][org_collection_name].count_documents({}) self.test_mongo_db.export_data(export_file_path, mongo_export_args) with open(export_file_path, "r") as exported_file: export_doc_count = len(exported_file.readlines()) self.assertEqual(coll_doc_count, export_doc_count) # import whatever we have exported into a new collection in the same database new_collection_name = "temp_variants_2_0" mongo_import_args = { "mode": "upsert", "collection": new_collection_name } self.test_mongo_db.import_data(export_file_path, mongo_import_args) imported_doc_count = self.test_mongo_db.mongo_handle[ self.dump_db_name][new_collection_name].count_documents({}) self.assertEqual(coll_doc_count, imported_doc_count) # delete the newly created temp collection self.test_mongo_db.mongo_handle[ self.dump_db_name][new_collection_name].drop()