def deleteJobStore(self): self.registry_domain.put_attributes(self.namePrefix, dict(exists=str(False))) if self.filesBucket is not None: for attempt in retry_s3(): with attempt: for upload in self.filesBucket.list_multipart_uploads(): upload.cancel_upload() if self.__getBucketVersioning(self.filesBucket) in (True, None): for attempt in retry_s3(): with attempt: for key in list(self.filesBucket.list_versions()): self.filesBucket.delete_key(key.name, version_id=key.version_id) else: for attempt in retry_s3(): with attempt: for key in list(self.filesBucket.list()): key.delete() for attempt in retry_s3(): with attempt: self.filesBucket.delete() for domain in (self.filesDomain, self.jobsDomain): if domain is not None: for attempt in retry_sdb(): with attempt: domain.delete()
def multipartReader(): buf = readable.read(store.partSize) if allowInlining and len(buf) <= self._maxInlinedSize(): self.content = buf else: headers = self._s3EncryptionHeaders() for attempt in retry_s3(): with attempt: upload = store.filesBucket.initiate_multipart_upload( key_name=self.fileID, headers=headers) try: for part_num in itertools.count(): # There must be at least one part, even if the file is empty. if len(buf) == 0 and part_num > 0: break for attempt in retry_s3(): with attempt: upload.upload_part_from_file(fp=StringIO(buf), # part numbers are 1-based part_num=part_num + 1, headers=headers) if len(buf) == 0: break buf = readable.read(self.outer.partSize) except: with panic(log=log): for attempt in retry_s3(): with attempt: upload.cancel_upload() else: for attempt in retry_s3(): with attempt: self.version = upload.complete_upload().version_id
def save(self): attributes, numNewContentChunks = self.toItem() # False stands for absence expected = ['version', False if self.previousVersion is None else self.previousVersion] try: for attempt in retry_sdb(): with attempt: assert self.outer.filesDomain.put_attributes(item_name=self.fileID, attributes=attributes, expected_value=expected) # clean up the old version of the file if necessary and safe if self.previousVersion and (self.previousVersion != self.version): for attempt in retry_s3(): with attempt: self.outer.filesBucket.delete_key(self.fileID, version_id=self.previousVersion) self._previousVersion = self._version if numNewContentChunks < self._numContentChunks: residualChunks = xrange(numNewContentChunks, self._numContentChunks) attributes = [self._chunkName(i) for i in residualChunks] for attempt in retry_sdb(): with attempt: self.outer.filesDomain.delete_attributes(self.fileID, attributes=attributes) self._numContentChunks = numNewContentChunks except SDBResponseError as e: if e.error_code == 'ConditionalCheckFailed': raise ConcurrentFileModificationException(self.fileID) else: raise
def delete(self, jobStoreID): # remove job and replace with jobStoreId. log.debug("Deleting job %s", jobStoreID) for attempt in retry_sdb(): with attempt: self.jobsDomain.delete_attributes(item_name=jobStoreID) items = None for attempt in retry_sdb(): with attempt: items = list(self.filesDomain.select( consistent_read=True, query="select version from `%s` where ownerID='%s'" % ( self.filesDomain.name, jobStoreID))) assert items is not None if items: log.debug("Deleting %d file(s) associated with job %s", len(items), jobStoreID) n = self.itemsPerBatchDelete batches = [items[i:i + n] for i in range(0, len(items), n)] for batch in batches: itemsDict = {item.name: None for item in batch} for attempt in retry_sdb(): with attempt: self.filesDomain.batch_delete_attributes(itemsDict) for item in items: version = item.get('version') for attempt in retry_s3(): with attempt: if version: self.filesBucket.delete_key(key_name=item.name, version_id=version) else: self.filesBucket.delete_key(key_name=item.name)
def copyKeyMultipart(srcKey, dstBucketName, dstKeyName, headers=None): """ Copies a key from a source key to a destination key in multiple parts. Note that if the destination key exists it will be overwritten implicitly, and if it does not exist a new key will be created. :param boto.s3.key.Key srcKey: The source key to be copied from. :param str dstBucketName: The name of the destination bucket for the copy. :param str dstKeyName: The name of the destination key that will be created or overwritten. :param dict headers: Any headers that should be passed. :rtype: boto.s3.multipart.CompletedMultiPartUpload :return: An object representing the completed upload. """ partSize = defaultPartSize # We need a location-agnostic connection to S3 so we can't use the one that we # normally use for interacting with the job store bucket. with closing(boto.connect_s3()) as s3: headers = headers or {} totalSize = srcKey.size for attempt in retry_s3(): with attempt: dstBucket = s3.get_bucket(dstBucketName) upload = dstBucket.initiate_multipart_upload(dstKeyName, headers=headers) try: start = 0 partIndex = itertools.count() while start < totalSize: end = min(start + partSize, totalSize) for attempt in retry_s3(): with attempt: upload.copy_part_from_key(src_bucket_name=srcKey.bucket.name, src_key_name=srcKey.name, src_version_id=srcKey.version_id, part_num=next(partIndex) + 1, start=start, end=end - 1, headers=headers) start += partSize except: with panic(log=log): upload.cancel_upload() else: for attempt in retry_s3(): with attempt: return upload.complete_upload()
def getPublicUrl(self, jobStoreFileID): info = self.FileInfo.loadOrFail(jobStoreFileID) if info.content is not None: with info.uploadStream(allowInlining=False) as f: f.write(info.content) for attempt in retry_s3(): with attempt: key = self.filesBucket.get_key(key_name=jobStoreFileID, version_id=info.version) return key.generate_url(expires_in=self.publicUrlExpiration.total_seconds())
def reader(): buf = readable.read() if allowInlining and len(buf) <= self._maxInlinedSize(): self.content = buf else: key = store.filesBucket.new_key(key_name=self.fileID) buf = StringIO(buf) headers = self._s3EncryptionHeaders() for attempt in retry_s3(): with attempt: assert buf.len == key.set_contents_from_file(fp=buf, headers=headers) self.version = key.version_id
def delete(self): store = self.outer if self.previousVersion is not None: for attempt in retry_sdb(): with attempt: store.filesDomain.delete_attributes( self.fileID, expected_values=['version', self.previousVersion]) if self.previousVersion: for attempt in retry_s3(): with attempt: store.filesBucket.delete_key(key_name=self.fileID, version_id=self.previousVersion)
def download(self, localFilePath): if self.content is not None: with open(localFilePath, 'w') as f: f.write(self.content) elif self.version: headers = self._s3EncryptionHeaders() key = self.outer.filesBucket.get_key(self.fileID, validate=False) for attempt in retry_s3(): with attempt: key.get_contents_to_filename(localFilePath, version_id=self.version, headers=headers) else: assert False
def copyTo(self, dstKey): """ Copies contents of this file to the given key. :param Key dstKey: The key to copy this file's content to """ if self.content is not None: for attempt in retry_s3(): with attempt: dstKey.set_contents_from_string(self.content) elif self.version: for attempt in retry_s3(): srcKey = self.outer.filesBucket.get_key(self.fileID, validate=False) srcKey.version_id = self.version with attempt: headers = {k.replace('amz-', 'amz-copy-source-', 1): v for k, v in self._s3EncryptionHeaders().iteritems()} self._copyKey(srcKey=srcKey, dstBucketName=dstKey.bucket.name, dstKeyName=dstKey.name, headers=headers) else: assert False
def __getBucketVersioning(self, bucket): """ A valueable lesson in how to botch a simple tri-state boolean. For newly created buckets get_versioning_status returns None. We map that to False. TBD: This may actually be a result of eventual consistency Otherwise, the 'Versioning' entry in the dictionary returned by get_versioning_status can be 'Enabled', 'Suspended' or 'Disabled' which we map to True, None and False respectively. Calling configure_versioning with False on a bucket will cause get_versioning_status to then return 'Suspended' for some reason. """ for attempt in retry_s3(): with attempt: status = bucket.get_versioning_status() return bool(status) and self.versionings[status['Versioning']]
def upload(self, localFilePath): file_size, file_time = self._fileSizeAndTime(localFilePath) if file_size <= self._maxInlinedSize(): with open(localFilePath) as f: self.content = f.read() else: headers = self._s3EncryptionHeaders() if file_size <= self.outer.partSize: key = self.outer.filesBucket.new_key(key_name=self.fileID) key.name = self.fileID for attempt in retry_s3(): with attempt: key.set_contents_from_filename(localFilePath, headers=headers) self.version = key.version_id else: with open(localFilePath, 'rb') as f: for attempt in retry_s3(): with attempt: upload = self.outer.filesBucket.initiate_multipart_upload( key_name=self.fileID, headers=headers) try: start = 0 part_num = itertools.count() while start < file_size: end = min(start + self.outer.partSize, file_size) assert f.tell() == start for attempt in retry_s3(): with attempt: upload.upload_part_from_file(fp=f, part_num=next(part_num) + 1, size=end - start, headers=headers) start = end assert f.tell() == file_size == start except: with panic(log=log): for attempt in retry_s3(): with attempt: upload.cancel_upload() else: for attempt in retry_s3(): with attempt: self.version = upload.complete_upload().version_id for attempt in retry_s3(): with attempt: key = self.outer.filesBucket.get_key(self.fileID, headers=headers, version_id=self.version) assert key.size == file_size # Make resonably sure that the file wasn't touched during the upload assert self._fileSizeAndTime(localFilePath) == (file_size, file_time)
def _copyKey(self, srcKey, dstBucketName, dstKeyName, headers=None): headers = headers or {} if srcKey.size > self.outer.partSize: return copyKeyMultipart(srcKey=srcKey, dstBucketName=dstBucketName, dstKeyName=dstKeyName, headers=headers) else: # We need a location-agnostic connection to S3 so we can't use the one that we # normally use for interacting with the job store bucket. with closing(boto.connect_s3()) as s3: for attempt in retry_s3(): with attempt: dstBucket = s3.get_bucket(dstBucketName) return dstBucket.copy_key(new_key_name=dstKeyName, src_bucket_name=srcKey.bucket.name, src_version_id=srcKey.version_id, src_key_name=srcKey.name, metadata=srcKey.metadata, headers=headers)
def writer(): try: if self.content is not None: writable.write(self.content) elif self.version: headers = self._s3EncryptionHeaders() key = self.outer.filesBucket.get_key(self.fileID, validate=False) for attempt in retry_s3(): with attempt: key.get_contents_to_file(writable, headers=headers, version_id=self.version) else: assert False finally: # This close() will send EOF to the reading end and ultimately cause # the yield to return. It also makes the implict .close() done by the # enclosing "with" context redundant but that should be ok since # .close() on file objects are idempotent. writable.close()
def __getBucketRegion(self, bucket): for attempt in retry_s3(): with attempt: return bucket_location_to_region(bucket.get_location())