Exemplo n.º 1
0
    def download_file(self,
                      blob,
                      local_path,
                      container_name=None,
                      use_basename=True):
        """
        Downloads a file from Azure Blob service.

        Args:
            blob: `str`. blob to download.
            local_path: `str`. the path to download to.
            container_name: `str`. the name of the container.
            use_basename: `bool`. whether or not to use the basename of the blob.
        """
        if not container_name:
            container_name, _, blob = self.parse_wasbs_url(blob)

        local_path = os.path.abspath(local_path)

        if use_basename:
            local_path = append_basename(local_path, blob)

        try:
            check_dirname_exists(local_path)
        except PolyaxonPathException as e:
            raise PolyaxonStoresException("Connection error: %s" % e) from e

        client = self.connection.get_container_client(container_name)
        try:
            with open(local_path, "wb") as file:
                client.download_blob(blob).readinto(file)
        except HttpResponseError as e:
            raise PolyaxonStoresException("Connection error: %s" % e) from e
Exemplo n.º 2
0
    def download_file(self, key, local_path, bucket_name=None, use_basename=True):
        """
        Download a file from S3.

        Args:
            key: `str`. S3 key that will point to the file.
            local_path: `str`. the path to download to.
            bucket_name: `str`. Name of the bucket in which to store the file.
            use_basename: `bool`. whether or not to use the basename of the key.
        """
        if not bucket_name:
            bucket_name, key = self.parse_s3_url(key)

        local_path = os.path.abspath(local_path)

        if use_basename:
            local_path = append_basename(local_path, key)

        try:
            check_dirname_exists(local_path)
        except PolyaxonPathException as e:
            raise PolyaxonStoresException("Connection error: %s" % e) from e

        try:
            self.connection.download_file(bucket_name, key, local_path)
        except ClientError as e:
            raise PolyaxonStoresException("Connection error: %s" % e) from e
Exemplo n.º 3
0
    def download_dir(
        self, blob, local_path, container_name=None, use_basename=True, workers=0
    ):
        """
        Download a directory from Azure Blob service.

        Args:
            blob: `str`. blob to download.
            local_path: `str`. the path to download to.
            container_name: `str`. the name of the container.
            use_basename: `bool`. whether or not to use the basename of the key.
            workers: number of workers threads to use for parallel execution.
        """
        if not container_name:
            container_name, _, blob = self.parse_wasbs_url(blob)

        local_path = os.path.abspath(local_path)

        if use_basename:
            local_path = append_basename(local_path, blob)

        try:
            check_dirname_exists(local_path, is_dir=True)
        except PolyaxonPathException:
            os.makedirs(local_path)

        results = self.list(container_name=container_name, key=blob, delimiter="/")

        # Create directories
        for prefix in sorted(results["prefixes"]):
            direname = os.path.join(local_path, prefix)
            prefix = os.path.join(blob, prefix)
            # Download files under
            self.download_dir(
                blob=prefix,
                local_path=direname,
                container_name=container_name,
                use_basename=False,
            )

        pool, future_results = self.init_pool(workers)

        # Download files
        for file_key in results["blobs"]:
            file_key = file_key[0]
            filename = os.path.join(local_path, file_key)
            file_key = os.path.join(blob, file_key)
            future_results = self.submit_pool(
                workers=workers,
                pool=pool,
                future_results=future_results,
                fn=self.download_file,
                blob=file_key,
                local_path=filename,
                container_name=container_name,
                use_basename=False,
            )
        if workers:
            futures.wait(future_results)
            self.close_pool(pool=pool)
Exemplo n.º 4
0
    def download_file(self,
                      blob,
                      local_path,
                      container_name=None,
                      use_basename=True):
        """
        Downloads a file from Google Cloud Storage.

        Args:
            blob: `str`. blob to download.
            local_path: `str`. the path to download to.
            container_name: `str`. the name of the container.
            use_basename: `bool`. whether or not to use the basename of the blob.
        """
        if not container_name:
            container_name, _, blob = self.parse_wasbs_url(blob)

        local_path = os.path.abspath(local_path)

        if use_basename:
            local_path = append_basename(local_path, blob)

        try:
            check_dirname_exists(local_path)
        except PolyaxonPathException as e:
            raise PolyaxonStoresException("Connection error: %s" % e) from e

        try:
            self.connection.get_blob_to_path(container_name, blob, local_path)
        except AzureHttpError as e:
            raise PolyaxonStoresException("Connection error: %s" % e) from e
Exemplo n.º 5
0
    def download_file(self,
                      blob,
                      local_path,
                      bucket_name=None,
                      use_basename=True):
        """
        Downloads a file from Google Cloud Storage.

        Args:
            blob: `str`. blob to download.
            local_path: `str`. the path to download to.
            bucket_name: `str`. the name of the bucket.
            use_basename: `bool`. whether or not to use the basename of the blob.
        """
        if not bucket_name:
            bucket_name, blob = self.parse_gcs_url(blob)

        local_path = os.path.abspath(local_path)

        if use_basename:
            local_path = append_basename(local_path, blob)

        try:
            check_dirname_exists(local_path)
        except PolyaxonPathException as e:
            raise PolyaxonStoresException("Connection error: %s" % e) from e

        try:
            blob = self.get_blob(blob=blob, bucket_name=bucket_name)
            blob.download_to_filename(local_path)
        except (NotFound, GoogleAPIError) as e:
            raise PolyaxonStoresException("Connection error: %s" % e) from e
Exemplo n.º 6
0
    def upload_dir(
        self,
        dirname,
        blob,
        container_name=None,
        use_basename=True,
        workers=0,
        last_time=None,
        exclude: List[str] = None,
    ):
        """
        Uploads a local directory to Azure Blob service.

        Args:
            dirname: `str`. name of the directory to upload.
            blob: `str`. blob to upload to.
            container_name: `str`. the name of the container.
            use_basename: `bool`. whether or not to use the basename of the directory.
            last_time: `datetime`. if provided,
                        it will only upload the file if changed after last_time.
            exclude: `list`. List of paths to exclude.
        """
        if not container_name:
            container_name, _, blob = self.parse_wasbs_url(blob)

        if use_basename:
            blob = append_basename(blob, dirname)

        pool, future_results = self.init_pool(workers)

        # Turn the path to absolute paths
        dirname = os.path.abspath(dirname)
        with get_files_in_path_context(dirname, exclude=exclude) as files:
            for f in files:

                # If last time is provided we check if we should re-upload the file
                if last_time and not file_modified_since(
                    filepath=f, last_time=last_time
                ):
                    continue

                file_blob = os.path.join(blob, os.path.relpath(f, dirname))
                future_results = self.submit_pool(
                    workers=workers,
                    pool=pool,
                    future_results=future_results,
                    fn=self.upload_file,
                    filename=f,
                    blob=file_blob,
                    container_name=container_name,
                    use_basename=False,
                )

        if workers:
            futures.wait(future_results)
            self.close_pool(pool=pool)
Exemplo n.º 7
0
    def download_file(self, path_from, local_path, use_basename=True, **kwargs):
        local_path = os.path.abspath(local_path)

        if use_basename:
            local_path = append_basename(local_path, path_from)

        if local_path == path_from:
            return

        check_or_create_path(local_path, is_dir=False)
        if os.path.exists(path_from) and os.path.isfile(path_from):
            shutil.copy(path_from, local_path)
Exemplo n.º 8
0
    def download_dir(self,
                     blob,
                     local_path,
                     bucket_name=None,
                     use_basename=True,
                     workers=0):
        """
        Download a directory from Google Cloud Storage.

        Args:
            blob: `str`. blob to download.
            local_path: `str`. the path to download to.
            bucket_name: `str`. Name of the bucket in which to store the file.
            use_basename: `bool`. whether or not to use the basename of the key.
            workers: number of workers threads to use for parallel execution.
        """
        if not bucket_name:
            bucket_name, blob = self.parse_gcs_url(blob)

        local_path = os.path.abspath(local_path)

        if use_basename:
            local_path = append_basename(local_path, blob)

        file_blobs = list(self.connection.list_blobs(bucket_name, prefix=blob))
        subdirs = set([
            os.path.dirname(os.path.relpath(file_blob.name, blob))
            for file_blob in file_blobs
        ])

        os.makedirs(local_path, exist_ok=True)
        for subdir in sorted(subdirs):
            os.makedirs(os.path.join(local_path, subdir), exist_ok=True)

        pool, future_results = self.init_pool(workers)

        # Download files
        for file_blob in file_blobs:
            filename = os.path.join(local_path,
                                    os.path.relpath(file_blob.name, blob))
            future_results = self.submit_pool(
                workers=workers,
                pool=pool,
                future_results=future_results,
                fn=_download_blob,
                blob=file_blob,
                local_path=filename,
            )

        if workers:
            futures.wait(future_results)
            self.close_pool(pool=pool)
Exemplo n.º 9
0
    def upload_dir(
        self,
        dirname,
        blob,
        bucket_name=None,
        use_basename=True,
        workers=0,
        last_time=None,
    ):
        """
        Uploads a local directory to to Google Cloud Storage.

        Args:
            dirname: `str`. name of the directory to upload.
            blob: `str`. blob to upload to.
            bucket_name: `str`. the name of the bucket.
            use_basename: `bool`. whether or not to use the basename of the directory.
            last_time: `datetime`. If provided will only upload the file if changed after last_time.
        """
        if not bucket_name:
            bucket_name, blob = self.parse_gcs_url(blob)

        if use_basename:
            blob = append_basename(blob, dirname)

        pool, future_results = self.init_pool(workers)

        # Turn the path to absolute paths
        dirname = os.path.abspath(dirname)
        with get_files_in_path_context(dirname) as files:
            for f in files:

                # If last time is provided we check if we should re-upload the file
                if last_time and not file_modified_since(filepath=f,
                                                         last_time=last_time):
                    continue

                file_blob = os.path.join(blob, os.path.relpath(f, dirname))
                future_results = self.submit_pool(
                    workers=workers,
                    pool=pool,
                    future_results=future_results,
                    fn=self.upload_file,
                    filename=f,
                    blob=file_blob,
                    bucket_name=bucket_name,
                    use_basename=False,
                )

        if workers:
            futures.wait(future_results)
            self.close_pool(pool=pool)
Exemplo n.º 10
0
    def upload_file(
        self,
        filename,
        key,
        bucket_name=None,
        overwrite=True,
        encrypt=False,
        acl=None,
        use_basename=True,
    ):
        """
        Uploads a local file to S3.

        Args:
            filename: `str`. name of the file to upload.
            key: `str`. S3 key that will point to the file.
            bucket_name: `str`. Name of the bucket in which to store the file.
            overwrite: `bool`. A flag to decide whether or not to overwrite the key
                if it already exists. If replace is False and the key exists, an
                error will be raised.
            encrypt: `bool`. If True, the file will be encrypted on the server-side
                by S3 and will be stored in an encrypted form while at rest in S3.
            acl: `str`. ACL to use for uploading, e.g. "public-read".
            use_basename: `bool`. whether or not to use the basename of the filename.
        """
        if not bucket_name:
            bucket_name, key = self.parse_s3_url(key)

        if use_basename:
            key = append_basename(key, filename)

        if not overwrite and self.check_key(key, bucket_name):
            raise PolyaxonStoresException(
                "The key {} already exists.".format(key))

        extra_args = {}
        if encrypt:
            extra_args["ServerSideEncryption"] = self.ENCRYPTION
        if acl:
            extra_args["ACL"] = acl

        self.connection.upload_file(filename,
                                    bucket_name,
                                    key,
                                    ExtraArgs=extra_args)
Exemplo n.º 11
0
    def upload_file(self, filename, blob, bucket_name=None, use_basename=True):
        """
        Uploads a local file to Google Cloud Storage.

        Args:
            filename: `str`. the file to upload.
            blob: `str`. blob to upload to.
            bucket_name: `str`. the name of the bucket.
            use_basename: `bool`. whether or not to use the basename of the filename.
        """
        if not bucket_name:
            bucket_name, blob = self.parse_gcs_url(blob)

        if use_basename:
            blob = append_basename(blob, filename)

        bucket = self.get_bucket(bucket_name)
        bucket.blob(blob).upload_from_filename(filename)
Exemplo n.º 12
0
    def upload_file(self, filename, blob, container_name=None, use_basename=True):
        """
        Uploads a local file to Azure Blob service.

        Args:
            filename: `str`. the file to upload.
            blob: `str`. blob to upload to.
            container_name: `str`. the name of the container.
            use_basename: `bool`. whether or not to use the basename of the filename.
        """
        if not container_name:
            container_name, _, blob = self.parse_wasbs_url(blob)

        if use_basename:
            blob = append_basename(blob, filename)

        client = self.connection.get_container_client(container_name)
        with open(filename, "rb") as file:
            client.upload_blob(blob, file, overwrite=True)
Exemplo n.º 13
0
    def upload_dir(
        self,
        dirname,
        path_to,
        use_basename=True,
        workers=0,
        last_time=None,
        exclude: List[str] = None,
    ):
        if use_basename:
            path_to = append_basename(path_to, dirname)

        if dirname == path_to:
            return

        check_or_create_path(path_to, is_dir=True)
        pool, future_results = self.init_pool(workers)

        # Turn the path to absolute paths
        dirname = os.path.abspath(dirname)
        with get_files_in_path_context(dirname, exclude=exclude) as files:
            for f in files:

                # If last time is provided we check if we should re-upload the file
                if last_time and not file_modified_since(
                    filepath=f, last_time=last_time
                ):
                    continue

                file_blob = os.path.join(path_to, os.path.relpath(f, dirname))
                future_results = self.submit_pool(
                    workers=workers,
                    pool=pool,
                    future_results=future_results,
                    fn=self.upload_file,
                    filename=f,
                    path_to=file_blob,
                    use_basename=False,
                )

        if workers:
            futures.wait(future_results)
            self.close_pool(pool=pool)
Exemplo n.º 14
0
    def upload_file(self,
                    filename,
                    blob,
                    container_name=None,
                    use_basename=True):
        """
        Uploads a local file to Google Cloud Storage.

        Args:
            filename: `str`. the file to upload.
            blob: `str`. blob to upload to.
            container_name: `str`. the name of the container.
            use_basename: `bool`. whether or not to use the basename of the filename.
        """
        if not container_name:
            container_name, _, blob = self.parse_wasbs_url(blob)

        if use_basename:
            blob = append_basename(blob, filename)

        self.connection.create_blob_from_path(container_name, blob, filename)
Exemplo n.º 15
0
    def download_dir(
        self,
        key: str,
        local_path: str,
        bucket_name: str = None,
        use_basename: bool = True,
        workers: int = 0,
    ):
        """
        Download a directory from S3.

        Args:
            key: `str`. S3 key that will point to the file.
            local_path: `str`. the path to download to.
            bucket_name: `str`. Name of the bucket in which to store the file.
            use_basename: `bool`. whether or not to use the basename of the key.
            workers: number of workers threads to use for parallel execution.
        """
        if not bucket_name:
            bucket_name, key = self.parse_s3_url(key)

        local_path = os.path.abspath(local_path)

        if use_basename:
            local_path = append_basename(local_path, key)

        try:
            check_dirname_exists(local_path, is_dir=True)
        except PolyaxonPathException:
            os.makedirs(local_path)

        results = self.list(bucket_name=bucket_name, prefix=key, delimiter="/")

        # Create directories
        for prefix in sorted(results["prefixes"]):
            direname = os.path.join(local_path, prefix)
            prefix = os.path.join(key, prefix)
            # Download files under
            self.download_dir(
                key=prefix,
                local_path=direname,
                bucket_name=bucket_name,
                use_basename=False,
            )

        pool, future_results = self.init_pool(workers)

        # Download files
        for file_key in results["keys"]:
            file_key = file_key[0]
            filename = os.path.join(local_path, file_key)
            file_key = os.path.join(key, file_key)
            future_results = self.submit_pool(
                workers=workers,
                pool=pool,
                future_results=future_results,
                fn=self.download_file,
                key=file_key,
                local_path=filename,
                bucket_name=bucket_name,
                use_basename=False,
            )

        if workers:
            futures.wait(future_results)
            self.close_pool(pool=pool)
Exemplo n.º 16
0
    def upload_dir(
        self,
        dirname,
        key,
        bucket_name=None,
        overwrite=False,
        encrypt=False,
        acl=None,
        use_basename=True,
        workers=0,
        last_time=None,
    ):
        """
        Uploads a local directory to S3.

        Args:
            dirname: `str`. name of the directory to upload.
            key: `str`. S3 key that will point to the file.
            bucket_name: `str`. Name of the bucket in which to store the file.
            overwrite: `bool`. A flag to decide whether or not to overwrite the key
                if it already exists. If replace is False and the key exists, an
                error will be raised.
            encrypt: `bool`. If True, the file will be encrypted on the server-side
                by S3 and will be stored in an encrypted form while at rest in S3.
            acl: `str`. ACL to use for uploading, e.g. "public-read".
            use_basename: `bool`. whether or not to use the basename of the directory.
            last_time: `datetime`. if provided, it will only upload the file if changed after last_time.
        """
        if not bucket_name:
            bucket_name, key = self.parse_s3_url(key)

        if use_basename:
            key = append_basename(key, dirname)

        pool, future_results = self.init_pool(workers)

        # Turn the path to absolute paths
        dirname = os.path.abspath(dirname)
        with get_files_in_path_context(dirname) as files:
            for f in files:

                # If last time is provided we check if we should re-upload the file
                if last_time and not file_modified_since(
                    filepath=f, last_time=last_time
                ):
                    continue

                file_key = os.path.join(key, os.path.relpath(f, dirname))
                future_results = self.submit_pool(
                    workers=workers,
                    pool=pool,
                    future_results=future_results,
                    fn=self.upload_file,
                    filename=f,
                    key=file_key,
                    bucket_name=bucket_name,
                    overwrite=overwrite,
                    encrypt=encrypt,
                    acl=acl,
                    use_basename=False,
                )
        if workers:
            futures.wait(future_results)
            self.close_pool(pool=pool)
Exemplo n.º 17
0
 def test_append_basename(self):
     assert append_basename("foo", "bar") == "foo/bar"
     assert append_basename("foo", "moo/bar") == "foo/bar"
     assert append_basename("/foo", "bar") == "/foo/bar"
     assert append_basename("/foo/moo", "bar") == "/foo/moo/bar"
     assert append_basename("/foo/moo", "boo/bar.txt") == "/foo/moo/bar.txt"