def cp_s3_file(s3_src_path, s3_root): """ Copy an s3 file to an s3 location Keeps the original file name. Args: s3_src_path: s3_root: Returns: """ s3 = b3.resource('s3') bucket, s3_path = split_s3_url(s3_root) filename = os.path.basename(s3_src_path) output_path = os.path.join(s3_path, filename) src_bucket, src_key = split_s3_url(s3_src_path) # print "Trying to copy from bucket {} key {} to bucket {} key {}".format(src_bucket, src_key, bucket, output_path) s3.Object(bucket, output_path).copy_from(CopySource={ 'Bucket': src_bucket, 'Key': src_key }, ServerSideEncryption="AES256") return os.path.join("s3://", bucket, output_path)
def s3_bucket_exists(bucket): """ Code from Amazon docs for checking bucket existence. Args: bucket: Returns: booL: whether bucket exists """ import botocore s3 = b3.resource('s3') exists = True try: s3.meta.client.head_bucket(Bucket=bucket) except botocore.exceptions.ClientError as e: error_code = int(e.response['Error']['Code']) if error_code == 404: exists = False elif error_code == 403: # for buckets you can get a forbidden instead of resource not found # if you have the s3:ListBucket permission on the bucket, Amazon S3 will return a # HTTP status code 404 ("no such key") error. If you don't have the s3:ListBucket permission, # Amazon S3 will return a HTTP status code 403 ("access denied") error. _logger.info( "aws_s3: bucket {} raised a 403 (access forbidden), do you have ListBucket permission?" .format(bucket)) exists = False else: raise return exists
def get_s3_file(s3_url, filename=None): s3 = b3.resource('s3') bucket, s3_path = split_s3_url(s3_url) if filename is None: filename = os.path.basename(s3_path) s3.Object(bucket, s3_path).download_file(filename) return filename
def ls_s3_url_objects(s3_url): """ Return aws boto3 ObjectSummary's Note: There is no current way in boto3 to do globs -- you filter on the client side. Returns: list:str: list of ObjectSummary's under this path """ result = [] if s3_url[-1] is not '/': s3_url += '/' s3 = b3.resource('s3') bucket, s3_path = split_s3_url(s3_url) if not s3_bucket_exists(bucket): return result s3_b = s3.Bucket(bucket) for i in s3_b.objects.filter(Prefix=s3_path, MaxKeys=1024): result.append(i) if len(result) == 1024: _logger.warn( "ls_s3_url_objects: hit MaxKeys 1024 limit in result set.") return result
def delete_s3_dir(s3_url): s3 = b3.resource('s3') bucket, s3_path = split_s3_url(s3_url) bucket = s3.Bucket(bucket) objects_to_delete = [] for obj in bucket.objects.filter(Prefix=s3_path): objects_to_delete.append({'Key': obj.key}) bucket.delete_objects(Delete={'Objects': objects_to_delete})
def delete_s3_file(s3_url): s3 = b3.resource('s3') bucket, s3_path = split_s3_url(s3_url) response = s3.Object(bucket, s3_path).delete() # print response # if response['DeleteMarker']: # return True # else: # return False # TODO: we're getting a different response than the docs say. return True
def get_s3_key(bucket, key, filename=None): """ Args: bucket: key: file_name: s3: A boto3.resource('s3') Returns: """ #print "PID({}) START bkt[] key[{}] file[{}]".format(multiprocessing.current_process(),key,filename) dl_retry = 3 s3 = b3.resource('s3') if filename is None: filename = os.path.basename(key) else: path = os.path.dirname(filename) if not os.path.exists(path): try: os.makedirs(path) except os.error as ose: # swallow error -- likely file already exists. _logger.warn("aws_s3.get_s3_key: Error code {}".format( os.strerror(ose.errno))) while dl_retry > 0: try: s3.Bucket(bucket).download_file(key, filename) dl_retry = -1 except Exception as e: _logger.warn( "aws_s3.get_s3_key Retry Count [{}] on download_file raised exception {}" .format(dl_retry, e)) dl_retry -= 1 if dl_retry <= 0: _logger.warn( "aws_s3.get_s3_key Fail on downloading file after 3 retries with exception {}" .format(e)) raise #print "PID({}) STOP bkt[] key[{}] file[{}]".format(multiprocessing.current_process(),key,filename) return filename
def put_s3_file(local_path, s3_root): """ Put local file to location at s3_root. Keeps original file name. Args: local_path: s3_root: Returns: """ s3 = b3.resource('s3') bucket, s3_path = split_s3_url(s3_root) filename = os.path.basename(local_path) s3.Object(bucket, os.path.join(s3_path, filename)).upload_file( local_path, ExtraArgs={"ServerSideEncryption": "AES256"}) return filename
def ls_s3_url_objects(s3_url): """ Return aws boto3 ObjectSummary's Note: There is no current way in boto3 to do globs -- you filter on the client side. Returns: list:str: list of ObjectSummary's under this path """ result = [] if s3_url[-1] is not '/': s3_url += '/' bucket, s3_path = split_s3_url(s3_url) #if not s3_bucket_exists(bucket): # return result if False: client = b3.client('s3') paginator = client.get_paginator('list_objects_v2') # use delimiter to groupby, which means, list things only at this level. #page_iterator = paginator.paginate(Bucket=bucket, Delimiter='/', Prefix=s3_path) page_iterator = paginator.paginate(Bucket=bucket, Prefix=s3_path) for page in page_iterator: result += [obj['Key'] for obj in page['Contents']] else: s3 = b3.resource('s3') try: s3_b = s3.Bucket(bucket) for i in s3_b.objects.filter(Prefix=s3_path, MaxKeys=1024): result.append(i) if len(result) == 1024: _logger.warn( "ls_s3_url_objects: hit MaxKeys 1024 limit in result set.") except Exception as e: _logger.error( "ls_s3_url_objects: failed with exception {}".format(e)) raise return result
def s3_path_exists(s3_url): """ Given an entire path, does the key exist? If you're checking for partial key, make sure to end with '/' This is how you make "folders" in s3, you use a key ending with '/' e.g., s3://mybucket/onelevel/anotherdir/ bucket = mybucket key = onelevel/anotherdir/ -- it's a zero size object. If checking for full path, you can end with thing itself. Args: s3_url: Returns: """ import botocore s3 = b3.resource('s3') bucket, key = split_s3_url(s3_url) if key is None: return s3_bucket_exists(bucket) exists = True try: s3.Object(bucket, key).load() except botocore.exceptions.ClientError as e: error_code = int(e.response['Error']['Code']) _logger.info("Error code {}".format(error_code)) if error_code == 404: exists = False else: raise return exists
def get_s3_resource(): return b3.resource('s3')