def backup(backend, src, tag="default"): # Try to load old metadata from latest backup old_backups = backend.list(prefix="b-*") old_meta_data = {} if old_backups: backup_id = utils.newest_backup_id(old_backups) om = backend.get(backup_id) try: old_meta_data = json.loads(om) except ValueError: pass start_time = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") path = os.path.expanduser(src) files = utils.find_modified_files(path) chunk_size = chunk_count = changed_bytes = 0 for filename, meta in files.items(): # Assume file is unchanged if neither mtime nor size is changed old = old_meta_data.get(unicode(filename, 'utf-8')) if old and old['m'] == meta['m'] and old['s'] == meta['s']: old_checksum = old.get('c') if old_checksum: meta['c'] = old_checksum logging.info("Skipped unchanged %s" % filename) continue fullname = os.path.join(path, filename) if not S_ISREG(meta['p']): # not a file continue my_sha256 = hashlib.sha256() chunk_checksums = [] try: chunks = rabin(fullname) except IOError: logging.warning("%s not found, skipping" % fullname) continue with open(fullname) as infile: for chunksize in chunks: data = infile.read(chunksize) my_sha256.update(data) chunk_checksum = utils.sha256_string(data) name = "c-%s" % chunk_checksum chunk_checksums.append(chunk_checksum) stored = backend.put(name, data) changed_bytes += len(data) if stored: chunk_size += len(data) chunk_count += 1 if len(chunk_checksums) > 1: checksum = my_sha256.hexdigest() name = "o-%s" % checksum backend.put(name, ';'.join(chunk_checksums)) else: name = "c-%s" % chunk_checksums[0] meta['c'] = name logging.info(fullname) # write backup summary meta_data = json.dumps(files) suffix = ''.join(random.choice(ascii_letters + digits) for _ in range(8)) backup_id = "b-%s-%s-%s" % (tag, start_time, suffix) backend.put(backup_id, meta_data) logging.info("Finished backup %s. %s bytes changed" % ( backup_id, changed_bytes)) logging.info("Stored %s new objects with a total size of %s bytes" % ( chunk_count, chunk_size)) return backup_id
def test_newest_backup_id(self): self.assertEqual('b-1', utils.newest_backup_id(['b-1', ])) self.assertEqual('b-2', utils.newest_backup_id(['b-1', 'b-2'])) self.assertEqual('b-2', utils.newest_backup_id(['b-1', 'a/b/c/b-2']))