Exemplo n.º 1
0
    def test_backup_restore(self):
        """ Test if backup and restore works correctly """

        backup_id = common.backup(self.backend, self.backup_dir)
        old_file = os.path.join(self.storage_dir, backup_id)

        # Create new backup, this should reuse the last metadata set and the
        # checksum should be reused. Metadata set should be identical
        with mock.patch('logging.info') as mock_log:
            backup_id = common.backup(self.backend, self.backup_dir)
            mock_log.assert_any_call('Skipped unchanged sub/o\xcc\x88')
        new_file = os.path.join(self.storage_dir, backup_id)
        self.assertEqual(utils.sha256_file(old_file), utils.sha256_file(new_file))

        # Check if data deduplication works
        chunks = utils.find_modified_files(self.storage_dir)
        storage_size = 0
        for filename, stat in chunks.items():
            if filename.startswith('c-'):
                storage_size += stat['s']
        self.assertTrue(storage_size < self.original_size)

        common.restore(self.backend, self.restore_dir, backup_id)

        # Compare original file content to restored file content
        for fn in ['x', 'sub/y']:
            old_filename = os.path.join(self.backup_dir, fn)
            old_hash = utils.sha256_file(old_filename)
            new_filename = os.path.join(self.restore_dir, fn)
            new_hash = utils.sha256_file(new_filename)
            self.assertEqual(old_hash, new_hash)
Exemplo n.º 2
0
def backup(backend, src, tag="default"):
    # Try to load old metadata from latest backup
    old_backups = backend.list(prefix="b-*")
    old_meta_data = {}
    if old_backups:
        backup_id = utils.newest_backup_id(old_backups)
        om = backend.get(backup_id)
        try:
            old_meta_data = json.loads(om)
        except ValueError:
            pass

    start_time = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
    path = os.path.expanduser(src)
    files = utils.find_modified_files(path)
    chunk_size = chunk_count = changed_bytes = 0
    for filename, meta in files.items():
        # Assume file is unchanged if neither mtime nor size is changed
        old = old_meta_data.get(unicode(filename, 'utf-8'))
        if old and old['m'] == meta['m'] and old['s'] == meta['s']:
            old_checksum = old.get('c')
            if old_checksum:
                meta['c'] = old_checksum
            logging.info("Skipped unchanged %s" % filename)
            continue

        fullname = os.path.join(path, filename)
        if not S_ISREG(meta['p']):  # not a file
            continue

        my_sha256 = hashlib.sha256()

        chunk_checksums = []
        try:
            chunks = rabin(fullname)
        except IOError:
            logging.warning("%s not found, skipping" % fullname)
            continue
        with open(fullname) as infile:
            for chunksize in chunks:
                data = infile.read(chunksize)
                my_sha256.update(data)
                chunk_checksum = utils.sha256_string(data)
                name = "c-%s" % chunk_checksum
                chunk_checksums.append(chunk_checksum)
                stored = backend.put(name, data)
                changed_bytes += len(data)
                if stored:
                    chunk_size += len(data)
                    chunk_count += 1
        if len(chunk_checksums) > 1:
            checksum = my_sha256.hexdigest()
            name = "o-%s" % checksum
            backend.put(name, ';'.join(chunk_checksums))
        else:
            name = "c-%s" % chunk_checksums[0]
        meta['c'] = name
        logging.info(fullname)

    # write backup summary
    meta_data = json.dumps(files)
    suffix = ''.join(random.choice(ascii_letters + digits) for _ in range(8))
    backup_id = "b-%s-%s-%s" % (tag, start_time, suffix)
    backend.put(backup_id, meta_data)
    logging.info("Finished backup %s. %s bytes changed" % (
                 backup_id, changed_bytes))
    logging.info("Stored %s new objects with a total size of %s bytes" % (
                 chunk_count, chunk_size))
    return backup_id
Exemplo n.º 3
0
 def test_find_files(self):
     """ Test if all files in directory tree are found """
     files = utils.find_modified_files(self.tempdir)
     self.assertTrue('sample' in files)
     self.assertTrue('sub/' in files)
     self.assertTrue('sub/file' in files)