Exemplo n.º 1
0
 def delete_file(self, content_hash):
     if content_hash is None:
         return
     prefix = path_prefix(content_hash)
     if prefix is None:
         return
     res = self.client.list_objects(Bucket=self.bucket, Prefix=prefix)
     for obj in res.get("Contents", []):
         self.client.delete_object(Bucket=self.bucket, Key=obj.get("Key"))
Exemplo n.º 2
0
 def delete_file(self, content_hash):
     prefix = path_prefix(content_hash)
     if prefix is None:
         return
     path = self.path.joinpath(prefix)
     try:
         for file_name in path.iterdir():
             return file_name.unlink()
     except FileNotFoundError:
         return
Exemplo n.º 3
0
 def _locate_key(self, content_hash):
     prefix = path_prefix(content_hash)
     if prefix is None:
         return
     path = self.path.joinpath(prefix)
     try:
         for file_name in path.iterdir():
             return file_name.resolve()
     except FileNotFoundError:
         return
Exemplo n.º 4
0
    def delete_file(self, content_hash):
        """Check if a file with the given hash exists on S3."""
        if content_hash is None or len(content_hash) < HASH_LENGTH:
            return
        prefix = path_prefix(content_hash)
        if prefix is None:
            return

        # Iterate over all file names:
        for blob in self.client.list_blobs(self.bucket, prefix=prefix):
            self._delete_blob(blob)
Exemplo n.º 5
0
 def list_files(self, prefix=None):
     prefix = path_prefix(prefix)
     if prefix is None:
         prefix = ""
     path = self.path.joinpath(prefix)
     if path.is_dir():
         path = f"{path}/**/*"
     else:
         path = f"{path}*/**/*"
     for file_path in glob.iglob(path, recursive=True):
         if os.path.isfile(file_path):
             yield path_content_hash(file_path)
Exemplo n.º 6
0
 def _locate_key(self, content_hash=None, prefix=None):
     """Check if a file with the given hash exists on S3."""
     if prefix is None:
         if content_hash is None:
             return
         prefix = path_prefix(content_hash)
         if prefix is None:
             return
     res = self.client.list_objects(MaxKeys=1,
                                    Bucket=self.bucket,
                                    Prefix=prefix)
     for obj in res.get("Contents", []):
         return obj.get("Key")
Exemplo n.º 7
0
 def list_files(self, prefix=None):
     """Try to list out all the hashes in the archive."""
     kwargs = {"Bucket": self.bucket}
     prefix = path_prefix(prefix)
     if prefix is not None:
         kwargs["Prefix"] = prefix
     token = None
     while True:
         if token is not None:
             kwargs["ContinuationToken"] = token
         res = self.client.list_objects_v2(**kwargs)
         for obj in res.get("Contents", []):
             yield path_content_hash(obj.get("Key"))
         if not res.get("IsTruncated"):
             break
         token = res.get("NextContinuationToken")
Exemplo n.º 8
0
    def _locate_contenthash(self, content_hash):
        """Check if a file with the given hash exists on S3."""
        if content_hash is None:
            return
        prefix = path_prefix(content_hash)
        if prefix is None:
            return

        # First, check the standard file name:
        blob = Blob(os.path.join(prefix, "data"), self.bucket)
        if blob.exists():
            return blob

        # Second, iterate over all file names:
        for blob in self.client.list_blobs(self.bucket,
                                           max_results=1,
                                           prefix=prefix):
            return blob
Exemplo n.º 9
0
    def archive_file(self, file_path, content_hash=None, mime_type=None):
        """Import the given file into the archive."""
        if content_hash is None:
            content_hash = checksum(file_path)

        if content_hash is None:
            return

        if self._locate_key(content_hash):
            return content_hash

        archive_prefix = path_prefix(content_hash)
        archive_path = self.path.joinpath(archive_prefix)
        archive_path.mkdir(parents=True, exist_ok=True)
        file_name = safe_filename(file_path, default="data")
        archive_path = archive_path.joinpath(file_name)
        with open(file_path, "rb") as fin:
            with open(archive_path, "wb") as fout:
                shutil.copyfileobj(fin, fout, BUF_SIZE)
        return content_hash
Exemplo n.º 10
0
    def archive_file(self, file_path, content_hash=None, mime_type=None):
        """Store the file located at the given path on Google, based on a path
        made up from its SHA1 content hash."""
        file_path = ensure_path(file_path)
        if content_hash is None:
            content_hash = checksum(file_path)

        if content_hash is None:
            return

        file_path = ensure_posix_path(file_path)
        for attempt in service_retries():
            try:
                # blob = self._locate_contenthash(content_hash)
                # if blob is not None:
                #     return content_hash

                path = os.path.join(path_prefix(content_hash), "data")
                blob = Blob(path, self.bucket)
                blob.upload_from_filename(file_path, content_type=mime_type)
                return content_hash
            except FAILURES:
                log.exception("Store error in GS")
                backoff(failures=attempt)
Exemplo n.º 11
0
    def archive_file(self, file_path, content_hash=None, mime_type=None):
        """Store the file located at the given path on S3, based on a path
        made up from its SHA1 content hash."""
        file_path = ensure_path(file_path)
        if content_hash is None:
            content_hash = checksum(file_path)

        # if content_hash is None:
        #     return

        obj = self._locate_key(content_hash)
        if obj is not None:
            return content_hash

        path = "{}/data".format(path_prefix(content_hash))
        extra = {}
        if mime_type is not None:
            extra["ContentType"] = mime_type
        with open(file_path, "rb") as fh:
            self.client.upload_fileobj(fh,
                                       self.bucket,
                                       str(path),
                                       ExtraArgs=extra)
        return content_hash
Exemplo n.º 12
0
 def list_files(self, prefix=None):
     prefix = path_prefix(prefix)
     if prefix is None:
         return
     for blob in self.client.list_blobs(self.bucket, prefix=prefix):
         yield path_content_hash(blob.name)