示例#1
0
def archive_data_from_source(mongo_source: MongoDatabase,
                             top_level_archive_dir):
    try:
        logger.info("Running mongodump from source...")

        # Force table scan is performant for many workloads avoids cursor timeout issues
        # See https://jira.mongodb.org/browse/TOOLS-845?focusedCommentId=988298&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-988298
        mongo_source.archive_data(archive_dir=top_level_archive_dir,
                                  archive_name=mongo_source.db_name,
                                  mongodump_args={
                                      "gzip": "",
                                      "forceTableScan": "",
                                      "numParallelCollections": "1"
                                  })
    except Exception as ex:
        logger.error(f"Error while dumping data from source!\n{ex.__str__()}")
        sys.exit(1)
示例#2
0
class TestMongoDatabase(TestCommon):
    dump_db_name = "test_mongo_db"
    uri = "mongodb://localhost:27017/admin"
    local_mongo_handle = pymongo.MongoClient()

    # Tests expect a local sharded Mongo instance
    def setUp(self) -> None:
        self.test_mongo_db = MongoDatabase(uri=self.uri,
                                           db_name=self.dump_db_name)
        self.dump_dir = os.path.join(self.resources_folder, self.dump_db_name)
        run_command_with_output(
            "Drop target test database if it already exists...",
            f"mongo {self.dump_db_name} "
            f"--eval 'db.dropDatabase()'")
        run_command_with_output("Import test database...",
                                f"mongorestore --dir {self.dump_dir}")

    def tearDown(self) -> None:
        pass

    def _restore_data_to_another_db(self):
        with tempfile.TemporaryDirectory() as tempdir:
            self.test_mongo_db.dump_data(tempdir)
            test_restore_db = MongoDatabase(
                uri=self.uri, db_name=self.test_mongo_db.db_name + "_restore")
            test_restore_db.drop()
            test_restore_db.restore_data(
                dump_dir=tempdir,
                mongorestore_args={
                    "nsFrom": f'"{self.test_mongo_db.db_name}.*"',
                    "nsTo": f'"{test_restore_db.db_name}.*"'
                })
            return test_restore_db

    def test_drop_database(self):
        self.test_mongo_db.drop()
        self.assertTrue(self.dump_db_name not in
                        self.local_mongo_handle.list_database_names())

    def test_get_indexes(self):
        expected_index_map = {
            'annotationMetadata_2_0': {
                '_id_': {
                    'key': [('_id', 1)],
                    'ns': 'test_mongo_db.annotationMetadata_2_0',
                    'v': 2
                }
            },
            'annotations_2_0': {
                '_id_': {
                    'key': [('_id', 1)],
                    'ns': 'test_mongo_db.annotations_2_0',
                    'v': 2
                },
                'ct.so_1': {
                    'background': True,
                    'key': [('ct.so', 1)],
                    'ns': 'test_mongo_db.annotations_2_0',
                    'v': 2
                },
                'xrefs.id_1': {
                    'background': True,
                    'key': [('xrefs.id', 1)],
                    'ns': 'test_mongo_db.annotations_2_0',
                    'v': 2
                }
            },
            'files_2_0': {
                '_id_': {
                    'key': [('_id', 1)],
                    'ns': 'test_mongo_db.files_2_0',
                    'v': 2
                },
                'unique_file': {
                    'background': True,
                    'key': [('sid', 1), ('fid', 1), ('fname', 1)],
                    'ns': 'test_mongo_db.files_2_0',
                    'unique': True,
                    'v': 2
                }
            },
            'variants_2_0': {
                '_id_': {
                    'key': [('_id', 1)],
                    'ns': 'test_mongo_db.variants_2_0',
                    'v': 2
                },
                'annot.so_1': {
                    'background': True,
                    'key': [('annot.so', 1)],
                    'ns': 'test_mongo_db.variants_2_0',
                    'v': 2
                },
                'annot.xrefs_1': {
                    'background': True,
                    'key': [('annot.xrefs', 1)],
                    'ns': 'test_mongo_db.variants_2_0',
                    'v': 2
                },
                'chr_1_start_1_end_1': {
                    'background': True,
                    'key': [('chr', 1), ('start', 1), ('end', 1)],
                    'ns': 'test_mongo_db.variants_2_0',
                    'v': 2
                },
                'files.sid_1_files.fid_1': {
                    'background': True,
                    'key': [('files.sid', 1), ('files.fid', 1)],
                    'ns': 'test_mongo_db.variants_2_0',
                    'v': 2
                },
                'ids_1': {
                    'background': True,
                    'key': [('ids', 1)],
                    'ns': 'test_mongo_db.variants_2_0',
                    'v': 2
                }
            }
        }
        self.assertDictEqual(expected_index_map,
                             self.test_mongo_db.get_indexes())

    def test_create_index_on_collections(self):
        collection_index_map = {
            'files_2_0': {
                'unique_file': {
                    'background': True,
                    'key': [('sid', 1), ('fid', 1), ('fname', 1)],
                    'ns': 'test_mongo_db.files_2_0',
                    'unique': True,
                    'v': 2
                }
            }
        }
        test_restore_db = self._restore_data_to_another_db()
        test_restore_db.create_index_on_collections(
            collection_index_map=collection_index_map)
        test_restore_db_index_info = test_restore_db.get_indexes()
        # Check if index with the name "unique_file" is created on the collection
        self.assertTrue('files_2_0' in test_restore_db_index_info.keys())
        self.assertTrue(
            'unique_file' in test_restore_db_index_info['files_2_0'])
        self.assertEqual(
            [('sid', 1), ('fid', 1), ('fname', 1)],
            test_restore_db_index_info['files_2_0']['unique_file']['key'])

    def test_enable_sharding(self):
        self.test_mongo_db.enable_sharding()
        # Query meta-collection in the config database to check sharding status
        self.assertTrue(
            len(
                list(self.local_mongo_handle["config"]["databases"].find(
                    {
                        "_id": self.test_mongo_db.db_name,
                        "partitioned": True
                    }))) > 0)

    def test_shard_collections(self):
        test_restore_db = self._restore_data_to_another_db()
        collection_to_shard = "files_2_0"
        test_restore_db.enable_sharding()
        test_restore_db.shard_collections(
            collections_shard_key_map={
                "files_2_0": (["sid", "fid", "fname"], True)
            },
            collections_to_shard=[collection_to_shard])
        # Query meta-collection in the config database to check sharding status
        self.assertTrue(
            len(
                list(self.local_mongo_handle["config"]["collections"].find({
                    "_id":
                    f"{test_restore_db.db_name}.{collection_to_shard}",
                    "key": {
                        "sid": 1,
                        "fid": 1,
                        "fname": 1
                    }
                }))) > 0)

    def test_dump_data(self):
        with tempfile.TemporaryDirectory() as tempdir:
            self.test_mongo_db.dump_data(tempdir)
            self.assertTrue(
                os.path.isdir(os.path.join(tempdir, self.dump_db_name)))

    def test_archive_data(self):
        with tempfile.TemporaryDirectory() as tempdir:
            self.test_mongo_db.archive_data(tempdir, self.dump_db_name)
            self.assertTrue(
                os.path.isfile(os.path.join(tempdir, self.dump_db_name)))

    def test_restore_data(self):
        test_restore_db = self._restore_data_to_another_db()
        self.assertTrue(test_restore_db.db_name in
                        self.local_mongo_handle.list_database_names())

    def test_export_import_data(self):
        org_collection_name = "variants_2_0"
        mongo_export_args = {"collection": org_collection_name}
        with tempfile.TemporaryDirectory() as tempdir:
            export_file_path = os.path.join(tempdir, self.dump_db_name)
            coll_doc_count = self.test_mongo_db.mongo_handle[
                self.dump_db_name][org_collection_name].count_documents({})
            self.test_mongo_db.export_data(export_file_path, mongo_export_args)
            with open(export_file_path, "r") as exported_file:
                export_doc_count = len(exported_file.readlines())
                self.assertEqual(coll_doc_count, export_doc_count)

            # import whatever we have exported into a new collection in the same database
            new_collection_name = "temp_variants_2_0"
            mongo_import_args = {
                "mode": "upsert",
                "collection": new_collection_name
            }
            self.test_mongo_db.import_data(export_file_path, mongo_import_args)
            imported_doc_count = self.test_mongo_db.mongo_handle[
                self.dump_db_name][new_collection_name].count_documents({})
            self.assertEqual(coll_doc_count, imported_doc_count)

            # delete the newly created temp collection
            self.test_mongo_db.mongo_handle[
                self.dump_db_name][new_collection_name].drop()