Пример #1
0
def upload_artifact(client, artifacts_s3_bucket, artifact_path, local_dir):
    total_size = 0
    for root, sub_dirs, files in os.walk(local_dir):
        for filename in files:
            file_path = os.path.join(root, filename)
            file_size = os.path.getsize(file_path)
            total_size += file_size

    uploader = transfer.S3Transfer(client, transfer.TransferConfig(),
                                   transfer.OSUtils())

    with tqdm(total=total_size,
              unit='B',
              unit_scale=True,
              unit_divisor=1024,
              desc=f"Uploading artifact '{short_artifact_path(artifact_path)}'"
              ) as pbar:

        def callback(size):
            pbar.update(size)

        for root, sub_dirs, files in os.walk(local_dir):
            for filename in files:
                file_path = os.path.join(root, filename)

                key = artifact_path + __remove_prefix(
                    str(file_path), str(Path(local_dir).absolute()))
                uploader.upload_file(
                    str(file_path),
                    artifacts_s3_bucket,
                    key,
                    callback=callback,
                )
Пример #2
0
def download_artifact(client,
                      artifacts_s3_bucket,
                      artifact_path,
                      output_dir=None):
    output_path = Path(output_dir if output_dir is not None else os.getcwd())

    response = client.list_objects(Bucket=artifacts_s3_bucket,
                                   Prefix=artifact_path)

    total_size = 0
    keys = []
    etags = []
    for obj in response.get("Contents") or []:
        key = obj["Key"]
        etag = obj["ETag"]
        dest_path = dest_file_path(key, output_path)
        if dest_path.exists():
            etag_path = etag_file_path(key, output_path)
            if etag_path.exists():
                if etag_path.read_text() != etag:
                    os.remove(etag_path)
                    os.remove(dest_path)
                else:
                    continue

        total_size += obj["Size"]
        if obj["Size"] > 0 and not key.endswith("/"):
            # Skip empty files that designate folders (required by FUSE)
            keys.append(key)
            etags.append(etag)

    downloader = transfer.S3Transfer(client, transfer.TransferConfig(),
                                     transfer.OSUtils())

    # TODO: Make download files in parallel
    with tqdm(
            total=total_size,
            unit='B',
            unit_scale=True,
            unit_divisor=1024,
            desc=f"Downloading artifact '{short_artifact_path(artifact_path)}'"
    ) as pbar:
        for i in range(len(keys)):
            key = keys[i]
            etag = etags[i]

            def callback(size):
                pbar.update(size)

            file_path = dest_file_path(key, output_path)
            file_path.parent.mkdir(parents=True, exist_ok=True)

            downloader.download_file(artifacts_s3_bucket,
                                     key,
                                     str(file_path),
                                     callback=callback)

            etag_path = Path(etag_file_path(key, output_path))
            etag_path.parent.mkdir(parents=True, exist_ok=True)
            etag_path.write_text(etag)
Пример #3
0
    def __init__(self, s3_client, bucket_name, prefix=None, kms_key_id=None, force_upload=False):
        self.s3 = s3_client
        self.bucket_name = bucket_name
        self.prefix = prefix
        self.kms_key_id = kms_key_id or None
        self.force_upload = force_upload
        self.transfer_manager = transfer.create_transfer_manager(self.s3, transfer.TransferConfig())

        self._artifact_metadata = None
Пример #4
0
def read_aws_boto(client, bucket_name, bucket_path, dest_file):
    """
    read an s3 resource via normal boto interface mutliparted   
    """
    config = tfr.TransferConfig(
        multipart_threshold=2 * 1024 * 1024,
        max_concurrency=10,
        num_download_attempts=10,
    )
    transfer = tfr.S3Transfer(client, config)
    transfer.download_file(bucket_name, bucket_path, dest_file)
Пример #5
0
def write_aws_boto(client, bucket_name, bucket_path, source_file):
    """
    write to aws via normal boto interface multiparted
    """
    config = tfr.TransferConfig(
        multipart_threshold=2 * 1024 * 1024,
        max_concurrency=10,
        num_download_attempts=10,
    )
    transfer = tfr.S3Transfer(client, config)
    transfer.upload_file(source_file, bucket_name, bucket_path, callback=ProgressPercentage(source_file))
Пример #6
0
def upload_files(mirror, all_tarballs):
    upload_config = transfer.TransferConfig(max_concurrency=10,
                                            use_threads=True)
    s3 = boto3.client('s3')
    monthly_directory = time.strftime("%Y_%m")
    for file_name in all_tarballs:
        print(f'Uploading {file_name} to S3')
        s3.upload_file(file_name,
                       mirror.aws_bucket,
                       f'{monthly_directory}/{file_name}',
                       ExtraArgs={'ACL': 'public-read'},
                       Config=upload_config)
Пример #7
0
 def get_s3_transfer(self):
     logger.info("Init s3 transfer {url}".format(url=self.endpoint_url))
     s3_config = transfer.TransferConfig(multipart_threshold=10 * TB,
                                         max_concurrency=10,
                                         multipart_chunksize=1 * TB,
                                         num_download_attempts=5,
                                         max_io_queue=100,
                                         io_chunksize=256 * KB,
                                         use_threads=True)
     try:
         s3_transfer = transfer.S3Transfer(self.s3_client, s3_config)
         return s3_transfer
     except Exception as e:
         raise Exception(e)
Пример #8
0
def upload_files(aws_bucket: str,
                 monthly_directory: str,
                 all_tarballs: List[Path]):
    upload_config = transfer.TransferConfig(
        max_concurrency=10,
        use_threads=True
    )
    s3 = boto3.client('s3')
    for tar in all_tarballs:
        name = tar.parts[-1]
        s3.upload_file(
            tar.absolute().as_posix(),
            aws_bucket,
            f'{monthly_directory}/{name}',
            ExtraArgs={'ACL': 'public-read'},
            Config=upload_config
        )
        _logger.info(f'uploaded {tar} to {aws_bucket}')
Пример #9
0
    def __init__(
        self,
        s3_client: Any,
        bucket_name: str,
        prefix: Optional[str] = None,
        kms_key_id: Optional[str] = None,
        force_upload: bool = False,
        no_progressbar: bool = False,
    ):
        self.s3 = s3_client
        self.bucket_name = bucket_name
        self.prefix = prefix
        self.kms_key_id = kms_key_id or None
        self.force_upload = force_upload
        self.no_progressbar = no_progressbar
        self.transfer_manager = transfer.create_transfer_manager(
            self.s3, transfer.TransferConfig())

        self._artifact_metadata = None
Пример #10
0
 def transfer_config_generator(self,
                               multipart_threshold: int = None,
                               max_concurrency: int = None,
                               multipart_chunksize: int = None,
                               num_download_attempts: int = None,
                               max_io_queue: int = None,
                               io_chunksize: int = None,
                               use_threads: bool = None):
     try:
         return transfer.TransferConfig(
             multipart_threshold=multipart_threshold
             or self.TRANSFER_MULTIPART_THRESHOLD,
             max_concurrency=max_concurrency
             or self.TRANSFER_MAX_CONCURRENCY,
             multipart_chunksize=multipart_chunksize
             or self.TRANSFER_MULTIPART_CHUNKSIZE,
             num_download_attempts=num_download_attempts
             or self.TRANSFER_NUM_DOWNLOAD_ATTEMPTS,
             max_io_queue=max_io_queue or self.TRANSFER_MAX_IO_QUEUE,
             io_chunksize=io_chunksize or self.TRANSFER_IO_CHUNKSIZE,
             use_threads=use_threads or self.TRANSFER_USE_THREADS)
     except Exception as e:
         return self._exception_handler(e)
Пример #11
0
def _upload_file(source_path: str, bucket: str, target_path: str,
                 storage_class: StorageClass) -> bool:
    logger.debug(f'Uploading {source_path} to {target_path}...')
    botocore_config = botocore.config.Config(max_pool_connections=CONNECTIONS)
    s3 = boto3.client('s3', config=botocore_config)
    transfer_config = s3transfer.TransferConfig(
        use_threads=True,
        max_concurrency=CONNECTIONS,
    )
    s3t = s3transfer.create_transfer_manager(s3, transfer_config)
    total_size = os.path.getsize(source_path)
    progress = tqdm.tqdm(
        desc='upload',
        total=total_size,
        unit='B',
        unit_scale=1,
        position=0,
        bar_format='{desc:<10}{percentage:3.0f}%|{bar:10}{r_bar}')
    future = s3t.upload(
        source_path,
        bucket,
        target_path,
        extra_args={
            'ServerSideEncryption': 'AES256',
            'StorageClass': storage_class.name
        },
        subscribers=[s3transfer.ProgressCallbackInvoker(progress.update)])
    try:
        future.result()
    except ClientError as e:
        logger.error(f'Upload failed for {source_path} to {target_path}', e)
        return False
    finally:
        s3t.shutdown()
        progress.close()

    return True
def create_s3t():
    s3_client = create_s3_client()
    transfer_config = s3transfer.TransferConfig(
        use_threads=True, max_concurrency=s3_client.meta.config.max_pool_connections
    )
    return s3transfer.create_transfer_manager(s3_client, config=transfer_config)