示例#1
0
def files_are_different(src, cached_item, multi_part_upload_threshold, storage_provider):
    multi_part_threshold = int(multi_part_upload_threshold) if multi_part_upload_threshold is not None else -1
    if src.stat().st_size >= multi_part_threshold and multi_part_threshold > 0 and is_aws_s3(storage_provider):
        md5_hash = md5_multipart(src)
        b64_encoded_hash = ""
    else:
        md5_hash = generate_md5_hash(src)
        b64_encoded_hash = base64.b64decode(md5_hash).hex()

    return (src.stat().st_size != cached_item['size']
            or (md5_hash != cached_item['MD5']  # single or multi part md5 hash. Used by S3 uploads.
                and b64_encoded_hash != cached_item['MD5']  # b64 encoded md5 hash. Used by GCS.
                and storage_provider != Provider.LOCAL))  # the local provider doesn't provide reliable hashes.
示例#2
0
    def replace_or_remove_if_cached(self, *, keyspace, columnfamily, srcs):
        retained = list()
        skipped = list()
        path_prefix = self._storage_driver.get_path_prefix(self._data_path)
        for src in srcs:
            if src.name in self.NEVER_BACKED_UP:
                pass
            else:
                fqtn = (keyspace, columnfamily)
                cached_item = None
                if self._storage_provider == Provider.GOOGLE_STORAGE or self._differential_mode is True:
                    cached_item = self._cached_objects.get(fqtn,
                                                           {}).get(src.name)

                threshold = self._storage_config.multi_part_upload_threshold \
                    if is_aws_s3(self._storage_provider) else None
                if cached_item is None or not self._storage_driver.file_matches_cache(
                        src, cached_item, threshold):
                    # We have no matching object in the cache matching the file
                    retained.append(src)
                else:
                    # File was already present in the previous backup
                    # In case the backup isn't differential or the cache backup isn't differential, copy from cache
                    if self._differential_mode is False and self._node_backup_cache_is_differential is False:
                        prefixed_path = '{}{}'.format(path_prefix,
                                                      cached_item['path'])
                        cached_item_path = self._storage_driver.get_cache_path(
                            prefixed_path)
                        retained.append(cached_item_path)
                    # This backup is differential, but the cached one wasn't
                    # We must re-upload the files according to the differential format
                    elif self._differential_mode is True and self._node_backup_cache_is_differential is False:
                        retained.append(src)
                    else:
                        # in case the backup is differential, we want to rule out files, not copy them from cache
                        manifest_object = self._make_manifest_object(
                            path_prefix, cached_item)
                        logging.debug(
                            "Skipping upload of {} which was already part of the previous backup"
                            .format(cached_item['path']))
                        skipped.append(manifest_object)
                    self._replaced += 1

        return retained, skipped