def _collections_in_database_but_not_in_bucket(self): """ Determines collection items in the table that: 1. No longer exist in the bucket. 2. Are tombstoned in the bucket. 3. Have an owner that doesn't match the owner found in the bucket's collection file. Returns an iterable tuple of strings: (owner, collection_fqid) representing the item's key pair. The returned keys can then be removed from the collections dynamodb table. """ for owner, collection_fqid in owner_lookup.get_all_collection_keys(): self.total_database_collection_items += 1 collection = CollectionFQID.from_key(f'{COLLECTION_PREFIX}/{collection_fqid}') try: collection_owner = json.loads(self.handle.get(self.bucket, collection.to_key()))['owner'] assert not self._is_uuid_tombstoned(collection.uuid) assert collection_owner == owner except BlobNotFoundError: yield owner, collection_fqid except AssertionError: yield owner, collection_fqid
def list_collections(per_page: int, start_at: int = 0): """ Return a list of a user's collections. Collection uuids are indexed and called by the user's email in a dynamoDB table. :param int per_page: # of collections returned per paged response. :param int start_at: Where the next chunk of paged response should start at. :return: A dictionary containing a list of dictionaries looking like: {'collections': [{'uuid': uuid, 'version': version}, {'uuid': uuid, 'version': version}, ... , ...]} """ # TODO: Replica is unused, so this does not use replica. Appropriate? owner = security.get_token_email(request.token_info) collections = [] for collection in owner_lookup.get_collection_fqids_for_owner(owner): fqid = CollectionFQID.from_key(f'{COLLECTION_PREFIX}/{collection}') collections.append({'uuid': fqid.uuid, 'version': fqid.version}) # paged response if len(collections) - start_at > per_page: next_url = UrlBuilder(request.url) next_url.replace_query("start_at", str(start_at + per_page)) collection_page = collections[start_at:start_at + per_page] response = make_response(jsonify({'collections': collection_page}), requests.codes.partial) response.headers['Link'] = f"<{next_url}>; rel='next'" response.headers['X-OpenAPI-Pagination'] = 'true' # single response returning all collections (or those remaining) else: collection_page = collections[start_at:] response = make_response(jsonify({'collections': collection_page}), requests.codes.ok) response.headers['X-OpenAPI-Pagination'] = 'false' response.headers['X-OpenAPI-Paginated-Content-Key'] = 'collections' return response
def _collections_in_bucket_but_not_in_database(self): """ Returns any (owner, collection_fqid) present in the bucket but not in the collections table. Returns an iterable tuple of strings: (owner, collection_fqid) representing the item's key pair. The returned keys can then be added to the collections dynamodb table. """ for collection_key in self.handle.list(self.bucket, prefix=f'{COLLECTION_PREFIX}/'): self.total_bucket_collection_items += 1 collection_fqid = CollectionFQID.from_key(collection_key) if not self._is_uuid_tombstoned(collection_fqid.uuid): try: collection = json.loads(self.handle.get(self.bucket, collection_key)) try: owner_lookup.get_collection(owner=collection['owner'], collection_fqid=str(collection_fqid)) except DynamoDBItemNotFound: yield collection['owner'], str(collection_fqid) except BlobNotFoundError: pass # if deleted from bucket while being listed except KeyError: pass # unexpected response else: self.total_tombstoned_bucket_collection_items += 1
def dependencies_exist(source_replica: Replica, dest_replica: Replica, key: str): """ Given a source replica and manifest key, checks if all dependencies of the corresponding DSS object are present in dest_replica: - Given a file manifest key, checks if blobs exist in dest_replica. - Given a bundle manifest key, checks if file manifests exist in dest_replica. - Given a collection key, checks if all collection contents exist in dest_replica. Returns true if all dependencies exist in dest_replica, false otherwise. """ source_handle = Config.get_blobstore_handle(source_replica) dest_handle = Config.get_blobstore_handle(dest_replica) if key.endswith(TOMBSTONE_SUFFIX): return True elif key.startswith(FILE_PREFIX): file_id = FileFQID.from_key(key) file_manifest = get_json_metadata( entity_type="file", uuid=file_id.uuid, version=file_id.version, replica=source_replica, blobstore_handle=source_handle, max_metadata_size=max_syncable_metadata_size) blob_path = compose_blob_key(file_manifest) if exists(dest_replica, blob_path): return True elif key.startswith(BUNDLE_PREFIX): # head all file manifests bundle_id = BundleFQID.from_key(key) bundle_manifest = get_json_metadata( entity_type="bundle", uuid=bundle_id.uuid, version=bundle_id.version, replica=source_replica, blobstore_handle=source_handle, max_metadata_size=max_syncable_metadata_size) try: with ThreadPoolExecutor(max_workers=20) as e: futures = list() for file in bundle_manifest[BundleMetadata.FILES]: file_uuid = file[BundleFileMetadata.UUID] file_version = file[BundleFileMetadata.VERSION] futures.append( e.submit(get_json_metadata, entity_type="file", uuid=file_uuid, version=file_version, replica=dest_replica, blobstore_handle=source_handle, max_metadata_size=max_syncable_metadata_size)) for future in as_completed(futures): future.result() return True except Exception: pass elif key.startswith(COLLECTION_PREFIX): collection_id = CollectionFQID.from_key(key) collection_manifest = get_json_metadata( entity_type="collection", uuid=collection_id.uuid, version=collection_id.version, replica=source_replica, blobstore_handle=source_handle, max_metadata_size=max_syncable_metadata_size) try: verify_collection(contents=collection_manifest["contents"], replica=dest_replica, blobstore_handle=dest_handle) return True except Exception: pass else: raise NotImplementedError("Unknown prefix for key {}".format(key)) return False