def write(self, b): """ Write the given bytes (binary string) into the S3 file from constructor. Note there's buffering happening under the covers, so this may not actually do any HTTP transfer right away. """ if isinstance(b, six.text_type): # not part of API: also accept unicode => encode it as utf8 b = b.encode('utf8') if not isinstance(b, six.binary_type): raise TypeError("input must be a binary string") self.lines.append(b) self.chunk_bytes += len(b) self.total_size += len(b) if self.chunk_bytes >= self.min_part_size: buff = b"".join(self.lines) logger.info("uploading part #%i, %i bytes (total %.3fGB)" % (self.parts, len(buff), self.total_size / 1024.0**3)) self.mp.upload_part_from_file(BytesIO(buff), part_num=self.parts + 1) logger.debug("upload of part #%i finished" % self.parts) self.parts += 1 self.lines, self.chunk_bytes = [], 0
def close(self): buff = b"".join(self.lines) if buff: logger.info("uploading last part #%i, %i bytes (total %.3fGB)" % (self.parts, len(buff), self.total_size / 1024.0 ** 3)) self.mp.upload_part_from_file(BytesIO(buff), part_num=self.parts + 1) logger.debug("upload of last part #%i finished" % self.parts) if self.total_size: self.mp.complete_upload() else: # AWS complains with "The XML you provided was not well-formed or did not validate against our published schema" # when the input is completely empty => abort the upload, no file created # TODO: or create the empty file some other way? logger.info("empty input, ignoring multipart upload") self.outbucket.cancel_multipart_upload(self.mp.key_name, self.mp.id)
async def _complete_multipart_upload(self, path, session_upload_id, parts_metadata): """This operation completes a multipart upload by assembling previously uploaded parts. Docs: https://docs.aws.amazon.com/AmazonS3/latest/API/mpUploadComplete.html """ payload = ''.join([ '<?xml version="1.0" encoding="UTF-8"?><CompleteMultipartUpload>', ''.join([ '<Part><PartNumber>{}</PartNumber><ETag>{}</ETag></Part>'. format(i + 1, xml.sax.saxutils.escape(part['ETAG'])) for i, part in enumerate(parts_metadata) ]), '</CompleteMultipartUpload>', ]).encode('utf-8') headers = { 'Content-Length': str(len(payload)), 'Content-MD5': compute_md5(BytesIO(payload))[1], 'Content-Type': 'text/xml', } params = {'uploadId': session_upload_id} complete_url = functools.partial(self.bucket.new_key( path.path).generate_url, settings.TEMP_URL_SECS, 'POST', query_parameters=params, headers=headers) resp = await self.make_request( 'POST', complete_url, data=payload, headers=headers, params=params, expects=( 200, 201, ), throws=exceptions.UploadError, ) await resp.release()
async def _delete_folder(self, path, **kwargs): """Query for recursive contents of folder and delete in batches of 1000 Called from: func: delete if not path.is_file Calls: func: self._check_region func: self.make_request func: self.bucket.generate_url :param *ProviderPath path: Path to be deleted On S3, folders are not first-class objects, but are instead inferred from the names of their children. A regular DELETE request issued against a folder will not work unless that folder is completely empty. To fully delete an occupied folder, we must delete all of the comprising objects. Amazon provides a bulk delete operation to simplify this. """ await self._check_region() more_to_come = True content_keys = [] query_params = {'prefix': path.path} marker = None while more_to_come: if marker is not None: query_params['marker'] = marker resp = await self.make_request( 'GET', self.bucket.generate_url(settings.TEMP_URL_SECS, 'GET', query_parameters=query_params), params=query_params, expects=(200, ), throws=exceptions.MetadataError, ) contents = await resp.read() parsed = xmltodict.parse( contents, strip_whitespace=False)['ListBucketResult'] more_to_come = parsed.get('IsTruncated') == 'true' contents = parsed.get('Contents', []) if isinstance(contents, dict): contents = [contents] content_keys.extend([content['Key'] for content in contents]) if len(content_keys) > 0: marker = content_keys[-1] # Query against non-existant folder does not return 404 if len(content_keys) == 0: raise exceptions.NotFoundError(str(path)) while len(content_keys) > 0: key_batch = content_keys[:1000] del content_keys[:1000] payload = '<?xml version="1.0" encoding="UTF-8"?>' payload += '<Delete>' payload += ''.join( map( lambda x: '<Object><Key>{}</Key></Object>'.format( xml.sax.saxutils.escape(x)), key_batch)) payload += '</Delete>' payload = payload.encode('utf-8') md5 = compute_md5(BytesIO(payload)) query_params = {'delete': ''} headers = { 'Content-Length': str(len(payload)), 'Content-MD5': md5[1], 'Content-Type': 'text/xml', } # We depend on a customized version of boto that can make query parameters part of # the signature. url = functools.partial( self.bucket.generate_url, settings.TEMP_URL_SECS, 'POST', query_parameters=query_params, headers=headers, ) resp = await self.make_request( 'POST', url, params=query_params, data=payload, headers=headers, expects=( 200, 204, ), throws=exceptions.DeleteError, ) await resp.release()
def test_compute_hash_bytesio(self): # Compute a hash from a file-like BytesIO object. f = BytesIO(self._gen_data()) compute_hashes_from_fileobj(f, chunk_size=512)