def files_are_different(src, cached_item, multi_part_upload_threshold, storage_provider): multi_part_threshold = int(multi_part_upload_threshold) if multi_part_upload_threshold is not None else -1 if src.stat().st_size >= multi_part_threshold and multi_part_threshold > 0 and is_aws_s3(storage_provider): md5_hash = md5_multipart(src) b64_encoded_hash = "" else: md5_hash = generate_md5_hash(src) b64_encoded_hash = base64.b64decode(md5_hash).hex() return (src.stat().st_size != cached_item['size'] or (md5_hash != cached_item['MD5'] # single or multi part md5 hash. Used by S3 uploads. and b64_encoded_hash != cached_item['MD5'] # b64 encoded md5 hash. Used by GCS. and storage_provider != Provider.LOCAL)) # the local provider doesn't provide reliable hashes.
def replace_or_remove_if_cached(self, *, keyspace, columnfamily, srcs): retained = list() skipped = list() path_prefix = self._storage_driver.get_path_prefix(self._data_path) for src in srcs: if src.name in self.NEVER_BACKED_UP: pass else: fqtn = (keyspace, columnfamily) cached_item = None if self._storage_provider == Provider.GOOGLE_STORAGE or self._differential_mode is True: cached_item = self._cached_objects.get(fqtn, {}).get(src.name) threshold = self._storage_config.multi_part_upload_threshold \ if is_aws_s3(self._storage_provider) else None if cached_item is None or not self._storage_driver.file_matches_cache( src, cached_item, threshold): # We have no matching object in the cache matching the file retained.append(src) else: # File was already present in the previous backup # In case the backup isn't differential or the cache backup isn't differential, copy from cache if self._differential_mode is False and self._node_backup_cache_is_differential is False: prefixed_path = '{}{}'.format(path_prefix, cached_item['path']) cached_item_path = self._storage_driver.get_cache_path( prefixed_path) retained.append(cached_item_path) # This backup is differential, but the cached one wasn't # We must re-upload the files according to the differential format elif self._differential_mode is True and self._node_backup_cache_is_differential is False: retained.append(src) else: # in case the backup is differential, we want to rule out files, not copy them from cache manifest_object = self._make_manifest_object( path_prefix, cached_item) logging.debug( "Skipping upload of {} which was already part of the previous backup" .format(cached_item['path'])) skipped.append(manifest_object) self._replaced += 1 return retained, skipped