class FilesystemBackupTool(object): def __init__(self, client_id, conf): self.client_id = client_id self.conf = conf if not self.conf.get('repository'): raise ValueError("You must specify a repository location") self.storage = StorageManager(self.conf['repository']) def create_snapshot(self, source): source = os.path.abspath(source) start_date = datetime.datetime.now() snapshot_name = '{0}_{1:%s}'.format(self.client_id, start_date) snapshot_meta = { 'client_id': self.client_id, 'date': start_date.strftime(DATE_FORMAT), 'hostname': socket.gethostname(), 'root_dir': source, } snapshot_files = self._get_file_info(source) snapshot_meta['end_date'] = \ datetime.datetime.now().strftime(DATE_FORMAT) self.storage.store_table(snapshot_name, snapshot_meta, snapshot_files) return snapshot_name def _get_file_info(self, filepath): print "Processing file: {0}".format(filepath) filename = os.path.basename(filepath) file_stat = os.lstat(filepath) st_ifmt = stat.S_IFMT(file_stat.st_mode) file_info = { 'type': IFMT_DESC[st_ifmt], 'name': filename, 'st_ifmt': st_ifmt, 'st_mode': file_stat.st_mode, 'st_ino': file_stat.st_ino, 'st_dev': file_stat.st_dev, 'st_nlink': file_stat.st_nlink, 'st_uid': file_stat.st_uid, 'st_gid': file_stat.st_gid, 'st_size': file_stat.st_size, 'st_atime': file_stat.st_atime, 'st_ctime': file_stat.st_ctime, 'st_mtime': file_stat.st_mtime, } if st_ifmt == stat.S_IFREG: with open(filepath, 'rb') as f: blob_hash = self.storage.store_blob(f) file_info['blob_hash'] = blob_hash elif st_ifmt == stat.S_IFDIR: file_info['children'] = list(self._get_dir_children(filepath)) return file_info def _get_dir_children(self, dirname): for filename in os.listdir(dirname): filepath = os.path.join(dirname, filename) yield self._get_file_info(filepath) def list_snapshots(self): return self.storage.list_tables(client_id=self.client_id) def get_file_info(self, snapshot_id, path=None): ## todo: read the table, list stuff, ... table_data = self.storage.get_table_data(snapshot_id) if path is None: return table_data path = filter(None, path.split("/")) def find_path(cur, trail): if len(trail) == 0: ## We're there return cur if cur['type'] != 'd': raise ValueError("No such file") for sub in cur['children']: if sub['name'] == trail[0]: return find_path(sub, trail[1:]) raise ValueError("No such file") selected = find_path(table_data, trail=path) return selected def restore_file(self, snapshot, filename, destination): pass def restore_full(self, destination): pass
class StorageTest(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() self.storage_manager = StorageManager(self.tmpdir, chunk_size=128) def tearDown(self): shutil.rmtree(self.tmpdir) def test_chunks_storage(self): ## todo: check deduplication for chunk_hash, chunk_data in DUMMY_CHUNKS.iteritems(): ## Test storage chunk_id = self.storage_manager.store_chunk(chunk_data) self.assertEqual(chunk_id, chunk_hash) for chunk_hash, chunk_data in DUMMY_CHUNKS.iteritems(): ## Test retrieval self.assertTrue(self.storage_manager.has_chunk(chunk_hash)) new_chunk = self.storage_manager.get_chunk(chunk_hash) self.assertEqual(chunk_data, new_chunk) for chunk_hash in DUMMY_CHUNKS.iterkeys(): ## Test deletion self.storage_manager.delete_chunk(chunk_hash) self.assertFalse(self.storage_manager.has_chunk(chunk_hash)) with self.assertRaises(Exception): self.storage_manager.get_chunk(chunk_hash) def test_blobs_storage(self): ## todo: test refcount of chunks from io import BytesIO for blob_hash, blob_data in DUMMY_BLOBS.iteritems(): ## Test storage b = BytesIO(blob_data) blob_id = self.storage_manager.store_blob(b) self.assertEqual(blob_id, blob_hash) for blob_hash, blob_data in DUMMY_BLOBS.iteritems(): ## Test retrieval self.assertTrue(self.storage_manager.has_blob(blob_hash)) b2 = BytesIO() self.storage_manager.retrieve_blob(blob_hash, b2) b2.seek(0) blob2 = b2.read() self.assertEqual(blob_data, blob2) for blob_hash, blob_data in DUMMY_BLOBS.iteritems(): ## Test deletion self.storage_manager.delete_blob_info(blob_hash) self.assertFalse(self.storage_manager.has_blob(blob_hash)) ## Test retrieval of non-existent blobs b2 = BytesIO() with self.assertRaises(Exception): self.storage_manager.retrieve_blob(blob_hash, b2) def test_table_storage(self): ## todo: test refcount of blobs + chunks ## todo: check refcounts / deletion / cleanup / etc. ## todo: test table metadata storage + indexing etc. for fs_name, fs_tree in DUMMY_FS.iteritems(): self.storage_manager.store_table(fs_name, { 'hostname': fs_name, }, fs_tree) self.assertListEqual( sorted(list(self.storage_manager.list_tables())), sorted(list(DUMMY_FS.iterkeys()))) for fs_name, fs_tree in DUMMY_FS.iteritems(): self.assertTrue(self.storage_manager.has_table(fs_name)) retr_tree = self.storage_manager.get_table_data(fs_name) self.assertDictEqual(retr_tree, fs_tree) for fs_name, fs_tree in DUMMY_FS.iteritems(): self.storage_manager.delete_table(fs_name) self.assertFalse(self.storage_manager.has_table(fs_name)) with self.assertRaises(Exception): self.storage_manager.get_table_data(fs_name)