def compute_diff( orig_file: IOIter, new_file: IOIter, diff_file: IOIter, discard_diff_percentage: Optional[float] = None, ) -> IOIter: """ Given an open original file and a new file, compute the diff between the two :param orig_file: an IOIter object whose contents are the "original" data :param new_file: an IOIter object whose contents are the "new" data :param diff_file: an IOIter object where the diff data will be written """ total_written = 0 writer = diff_file.writer() next(writer) logger.debug2('beginning diff computation') for orig_bytes, new_bytes in zip_longest(orig_file.reader(), new_file.reader(), fillvalue=b''): diff = bsdiff4.diff(orig_bytes, new_bytes) diff_str = str(len(diff)).encode() + SEPARATOR + diff total_written += len(diff_str) if discard_diff_percentage and total_written > orig_file.size * discard_diff_percentage: raise DiffTooLargeException writer.send(diff_str) return diff_file
def _load(self, path: str, output_file: IOIter) -> IOIter: path = path.replace('\\', '/') logger.info( f'Reading s3://{self._bucket}/{path} into {output_file.filename}') response = self._client.get_object(Bucket=self._bucket, Key=path) writer = output_file.writer() next(writer) for data in response['Body'].iter_chunks(BLOCK_SIZE): writer.send(data) return output_file
def decrypt_and_unpack( input_file: IOIter, output_file: IOIter, key_pair: Optional[bytes], options: OptionsDict, ) -> None: """ Read encrypted, GZIPed data from an open file descriptor, and write the decoded data to another file descriptor; verify the HMAC of the encrypted data to ensure integrity :param input_file: an IOIter object to read compressed ciphertext from :param output_file: an IOIter object to write plaintext data to """ key, nonce, signature = ( key_pair[:AES_KEY_SIZE], key_pair[AES_KEY_SIZE:AES_KEY_SIZE + AES_BLOCK_SIZE], key_pair[AES_KEY_SIZE + AES_BLOCK_SIZE:] ) if key_pair else (b'', b'', b'') decrypted_data = b'' decrypt_fn: Callable[[bytes], bytes] = ( Cipher(AES(key), CTR(nonce), backend=default_backend()).decryptor().update if options['use_encryption'] else identity ) decompress_obj = zlib.decompressobj() unzip_fn: Callable[[bytes], bytes] = ( decompress_obj.decompress # type: ignore if options['use_compression'] else identity ) hmac = HMAC(key, SHA256(), default_backend()) writer = output_file.writer(); next(writer) for encrypted_data in input_file.reader(): if options['use_encryption']: hmac.update(encrypted_data) decrypted_data += decrypt_fn(encrypted_data) logger.debug2(f'decrypt_fn returned {len(decrypted_data)} bytes') block = unzip_fn(decrypted_data) logger.debug2(f'unzip_fn returned {len(block)} bytes') writer.send(block) decrypted_data = decompress_obj.unused_data # Decompress and write out the last block if decrypted_data: block = unzip_fn(decrypted_data) logger.debug2(f'unzip_fn returned {len(block)} bytes') writer.send(block) try: if options['use_encryption']: hmac.verify(signature) except InvalidSignature as e: raise BackupCorruptedError("The file's signature did not match the data") from e
def compress_and_encrypt( input_file: IOIter, output_file: IOIter, key_pair: Optional[bytes], options: OptionsDict, ) -> bytes: """ Read data from an open file descriptor, and write the compressed, encrypted data to another file descriptor; compute the HMAC of the encrypted data to ensure integrity :param input_file: an IOIter object to read plaintext data from :param output_file: an IOIter object to write compressed ciphertext to """ key, nonce = (key_pair[:AES_KEY_SIZE], key_pair[AES_KEY_SIZE:]) if key_pair else (b'', b'') compressobj = zlib.compressobj() zip_fn: Callable[[bytes], bytes] = ( # type: ignore compressobj.compress if options['use_compression'] else identity ) encrypt_fn: Callable[[bytes], bytes] = ( Cipher(AES(key), CTR(nonce), backend=default_backend()).encryptor().update if options['use_encryption'] else identity ) hmac = HMAC(key, SHA256(), default_backend()) def last_block() -> Generator[Tuple[bytes, bool], None, None]: yield (compressobj.flush(), False) if options['use_compression'] else (b'', False) writer = output_file.writer(); next(writer) logger.debug2('starting to compress') for block, needs_compression in chain(zip(input_file.reader(), repeat(True)), last_block()): if needs_compression: block = zip_fn(block) logger.debug2(f'zip_fn returned {len(block)} bytes') block = encrypt_fn(block) logger.debug2(f'encrypt_fn returned {len(block)} bytes') if options['use_encryption']: hmac.update(block) writer.send(block) if options['use_encryption']: return hmac.finalize() else: return b''
def apply_diff(orig_file: IOIter, diff_file: IOIter, new_file: IOIter) -> None: """ Given an original file and a diff file, write out a new file with the diff applied :param orig_file: an IOIter object whose contents are the "original" data :param diff_file: an IOIter object whose contents are the diff to be applied :param new_file: an IOIter object where the new file data will be written """ # The outer loop reads a chunk of data at a time; the inner loop parses # the read chunk one step at a time and applies it diff = b'' new_writer = new_file.writer() next(new_writer) orig_reader = orig_file.reader() logger.debug2('applying diff') for diff_chunk in diff_file.reader(): diff += diff_chunk while diff: # try to parse the next chunk; if we can't, break out of the loop to get more data try: diff_len_str, remainder = diff.split(SEPARATOR, 1) except ValueError: break diff_len = int(diff_len_str) if len(remainder) < diff_len: break try: orig_block = next(orig_reader) except StopIteration: orig_block = b'' new_writer.send(bsdiff4.patch(orig_block, remainder[:diff_len])) diff = remainder[diff_len:] if diff: raise DiffParseError(f'Un-parseable diff: {diff}') # type: ignore