def setUp(self): db1 = TemporaryFilesystemBlobDB() assert get_blob_db() is db1, (get_blob_db(), db1) data = b'binary data not valid utf-8 \xe4\x94' self.not_founds = set() self.blob_metas = [] for type_code in [CODES.form_xml, CODES.multimedia, CODES.data_export]: meta = db1.put(BytesIO(data), meta=new_meta(type_code=type_code)) lost = new_meta(type_code=type_code, content_length=42) self.blob_metas.append(meta) self.blob_metas.append(lost) lost.save() self.not_founds.add(( lost.id, lost.domain, lost.type_code, lost.parent_id, lost.key, )) self.test_size = len(self.blob_metas) db2 = TemporaryFilesystemBlobDB() self.db = TemporaryMigratingBlobDB(db2, db1) assert get_blob_db() is self.db, (get_blob_db(), self.db) discard_migration_state(self.slug)
def setUp(self): lost_db = TemporaryFilesystemBlobDB() # must be created before other dbs db1 = TemporaryFilesystemBlobDB() assert get_blob_db() is db1, (get_blob_db(), db1) missing = "found.not" name = "blob.bin" data = b'binary data not valid utf-8 \xe4\x94' self.not_founds = set() self.couch_docs = [] with lost_db: for doc_type, model_class in self.couch_doc_types.items(): item = model_class() item.doc_type = doc_type item.save() item.put_attachment(data, name) with install_blob_db(lost_db): item.put_attachment(data, missing) self.not_founds.add(( doc_type, item._id, item.external_blobs[missing].id, item._blobdb_bucket(), )) item.save() self.couch_docs.append(item) def create_obj(rex): ident = random_url_id(8) args = {rex.blob_helper.id_attr: ident} fields = {getattr(f, "attname", "") for f in rex.model_class._meta.get_fields()} if "content_length" in fields: args["content_length"] = len(data) elif "length" in fields: args["length"] = len(data) item = rex.model_class(**args) save_attr = rex.model_class.__name__ + "_save" if hasattr(self, save_attr): getattr(self, save_attr)(item, rex) else: item.save() return item, ident self.sql_docs = [] for rex in (x() for x in self.sql_reindex_accessors): item, ident = create_obj(rex) helper = rex.blob_helper({"_obj_not_json": item}) db1.put(StringIO(data), ident, helper._blobdb_bucket()) self.sql_docs.append(item) lost, lost_blob_id = create_obj(rex) self.sql_docs.append(lost) self.not_founds.add(( rex.model_class.__name__, lost.id, lost_blob_id, rex.blob_helper({"_obj_not_json": lost})._blobdb_bucket(), )) self.test_size = len(self.couch_docs) + len(self.sql_docs) db2 = TemporaryFilesystemBlobDB() self.db = TemporaryMigratingBlobDB(db2, db1) assert get_blob_db() is self.db, (get_blob_db(), self.db) BaseMigrationTest.discard_migration_state(self.slug)
class TestBigBlobExport(TestCase): domain_name = 'big-blob-test-domain' def setUp(self): # psutil is in dev-requirements only. Don't bother trying to # import for the module if the test is skipped. from psutil import virtual_memory self.memory = virtual_memory().total self.db = TemporaryFilesystemBlobDB() assert get_blob_db() is self.db, (get_blob_db(), self.db) self.blob_metas = [] def tearDown(self): for meta in self.blob_metas: meta.delete() self.db.close() def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) MB = 1024**2 self.mb_block = b'\x00' * MB def mb_blocks(self): while True: yield self.mb_block def test_many_big_blobs(self): number_of_1gb_blobs = ceil(self.memory / 1024**3) + 1 for __ in range(number_of_1gb_blobs): meta = self.db.put(MockBigBlobIO(self.mb_blocks(), 1024), meta=new_meta(domain=self.domain_name, type_code=CODES.multimedia)) self.blob_metas.append(meta) with NamedTemporaryFile() as out: exporter = EXPORTERS['all_blobs'](self.domain_name) exporter.migrate(out.name, force=True) with tarfile.open(out.name, 'r:gz') as tgzfile: self.assertEqual(set(tgzfile.getnames()), {m.key for m in self.blob_metas}) def test_1_very_big_blob(self): number_of_1mb_blocks = ceil(self.memory / 1024**2) + 1 meta = self.db.put(MockBigBlobIO(self.mb_blocks(), number_of_1mb_blocks), meta=new_meta(domain=self.domain_name, type_code=CODES.multimedia)) self.blob_metas.append(meta) with NamedTemporaryFile() as out: exporter = EXPORTERS['all_blobs'](self.domain_name) exporter.migrate(out.name, force=True) with tarfile.open(out.name, 'r:gz') as tgzfile: self.assertEqual(set(tgzfile.getnames()), {m.key for m in self.blob_metas})
class TestExtendingExport(TestCase): domain_name = 'extending-export-test-domain' def setUp(self): self.db = TemporaryFilesystemBlobDB() assert get_blob_db() is self.db, (get_blob_db(), self.db) self.blob_metas = [] def tearDown(self): for meta in self.blob_metas: meta.delete() self.db.close() def test_extends(self): # First export file ... for blob in (b'ham', b'spam', b'eggs'): meta_meta = new_meta( domain=self.domain_name, type_code=CODES.multimedia, ) meta = self.db.put(BytesIO(blob), meta=meta_meta) # Naming ftw self.blob_metas.append(meta) with NamedTemporaryFile() as file_one: exporter = EXPORTERS['all_blobs'](self.domain_name) exporter.migrate(file_one.name, force=True) with tarfile.open(file_one.name, 'r:gz') as tgzfile: keys_in_file_one = set(m.key for m in self.blob_metas[-3:]) self.assertEqual(set(tgzfile.getnames()), keys_in_file_one) # Second export file extends first ... for blob in (b'foo', b'bar', b'baz'): meta_meta = new_meta( domain=self.domain_name, type_code=CODES.multimedia, ) meta = self.db.put(BytesIO(blob), meta=meta_meta) self.blob_metas.append(meta) with NamedTemporaryFile() as file_two: exporter = EXPORTERS['all_blobs'](self.domain_name) exporter.migrate( file_two.name, already_exported=keys_in_file_one, force=True, ) with tarfile.open(file_two.name, 'r:gz') as tgzfile: keys_in_file_two = set(m.key for m in self.blob_metas[-3:]) self.assertEqual(set(tgzfile.getnames()), keys_in_file_two) # Third export file extends first and second ... for blob in (b'wibble', b'wobble', b'wubble'): meta_meta = new_meta( domain=self.domain_name, type_code=CODES.multimedia, ) meta = self.db.put(BytesIO(blob), meta=meta_meta) self.blob_metas.append(meta) with NamedTemporaryFile() as file_three: exporter = EXPORTERS['all_blobs'](self.domain_name) exporter.migrate( file_three.name, already_exported=keys_in_file_one | keys_in_file_two, force=True, ) with tarfile.open(file_three.name, 'r:gz') as tgzfile: keys_in_file_three = set(m.key for m in self.blob_metas[-3:]) self.assertEqual(set(tgzfile.getnames()), keys_in_file_three)