示例#1
0
def get_impl(uuid: str, replica: str, version: str = None):
    uuid = uuid.lower()
    bucket = Replica[replica].bucket
    handle = Config.get_blobstore_handle(Replica[replica])

    tombstone_key = CollectionTombstoneID(uuid, version=None).to_key()
    if test_object_exists(handle, bucket, tombstone_key):
        raise DSSException(
            404, "not_found",
            "Could not find collection for UUID {}".format(uuid))

    if version is None:
        # list the collections and find the one that is the most recent.
        prefix = CollectionFQID(uuid, version=None).to_key_prefix()
        for matching_key in handle.list(bucket, prefix):
            matching_key = matching_key[len(prefix):]
            if version is None or matching_key > version:
                version = matching_key
    try:
        collection_blob = handle.get(bucket,
                                     CollectionFQID(uuid, version).to_key())
    except BlobNotFoundError:
        raise DSSException(
            404, "not_found",
            "Could not find collection for UUID {}".format(uuid))
    return json.loads(collection_blob)
示例#2
0
 def test_from_key(self):
     """
     Test that the from key method correctly returns the right types of identifiers
     """
     uuid = "ca11ab1e-0000-4a6b-8f0d-a7d2105c23be"
     version = "2017-12-05T235728.441373Z"
     self.assertEquals(
         BundleFQID(uuid, version),
         ObjectIdentifier.from_key(f"{BUNDLE_PREFIX}/{uuid}.{version}"),
     )
     self.assertEquals(
         FileFQID(uuid, version),
         ObjectIdentifier.from_key(f"{FILE_PREFIX}/{uuid}.{version}"),
     )
     self.assertEquals(
         CollectionFQID(uuid, version),
         ObjectIdentifier.from_key(f"{COLLECTION_PREFIX}/{uuid}.{version}"),
     )
     self.assertEquals(
         CollectionTombstoneID(uuid, version),
         ObjectIdentifier.from_key(f"{COLLECTION_PREFIX}/{uuid}.{version}.dead"),
     )
     self.assertEquals(
         BundleTombstoneID(uuid, version),
         ObjectIdentifier.from_key(f"{BUNDLE_PREFIX}/{uuid}.{version}.dead"),
     )
     self.assertRaises(
         ValueError,
         lambda: ObjectIdentifier.from_key(f"{BUNDLE_PREFIX}/trash"),
     )
     self.assertRaises(
         ValueError,
         lambda: ObjectIdentifier.from_key(f"trash/{uuid}.{version}.dead"),
     )
    def _collections_in_database_but_not_in_bucket(self):
        """
        Determines collection items in the table that:
        1. No longer exist in the bucket.
        2. Are tombstoned in the bucket.
        3. Have an owner that doesn't match the owner found in the bucket's collection file.

        Returns an iterable tuple of strings: (owner, collection_fqid) representing the item's key pair.

        The returned keys can then be removed from the collections dynamodb table.
        """
        for owner, collection_fqid in owner_lookup.get_all_collection_keys():
            self.total_database_collection_items += 1
            collection = CollectionFQID.from_key(f'{COLLECTION_PREFIX}/{collection_fqid}')
            try:
                collection_owner = json.loads(self.handle.get(self.bucket, collection.to_key()))['owner']

                assert not self._is_uuid_tombstoned(collection.uuid)
                assert collection_owner == owner

            except BlobNotFoundError:
                yield owner, collection_fqid

            except AssertionError:
                yield owner, collection_fqid
示例#4
0
def patch(uuid: str, json_request_body: dict, replica: str, version: str):
    authenticated_user_email = security.get_token_email(request.token_info)

    uuid = uuid.lower()
    owner = get_impl(uuid=uuid, replica=replica)["owner"]
    if owner != authenticated_user_email:
        raise DSSException(requests.codes.forbidden, "forbidden",
                           f"Collection access denied")

    handle = Config.get_blobstore_handle(Replica[replica])
    try:
        cur_collection_blob = handle.get(
            Replica[replica].bucket,
            CollectionFQID(uuid, version).to_key())
    except BlobNotFoundError:
        raise DSSException(
            404, "not_found",
            "Could not find collection for UUID {}".format(uuid))
    collection = json.loads(cur_collection_blob)
    for field in "name", "description", "details":
        if field in json_request_body:
            collection[field] = json_request_body[field]
    remove_contents_set = set(
        map(hashabledict, json_request_body.get("remove_contents", [])))
    collection["contents"] = [
        i for i in collection["contents"]
        if hashabledict(i) not in remove_contents_set
    ]
    verify_collection(json_request_body.get("add_contents", []),
                      Replica[replica], handle)
    collection["contents"].extend(json_request_body.get("add_contents", []))
    collection["contents"] = _dedpuplicate_contents(collection["contents"])
    timestamp = datetime.datetime.utcnow()
    new_collection_version = datetime_to_version_format(timestamp)
    handle.upload_file_handle(
        Replica[replica].bucket,
        CollectionFQID(uuid, new_collection_version).to_key(),
        io.BytesIO(json.dumps(collection).encode("utf-8")))
    return jsonify(dict(uuid=uuid,
                        version=new_collection_version)), requests.codes.ok
示例#5
0
def put(json_request_body: dict, replica: str, uuid: str, version: str):
    authenticated_user_email = security.get_token_email(request.token_info)
    collection_body = dict(json_request_body, owner=authenticated_user_email)
    uuid = uuid.lower()
    handle = Config.get_blobstore_handle(Replica[replica])
    collection_body["contents"] = _dedpuplicate_contents(
        collection_body["contents"])
    verify_collection(collection_body["contents"], Replica[replica], handle)
    collection_uuid = uuid if uuid else str(uuid4())
    collection_version = version
    # update dynamoDB; used to speed up lookup time; will not update if owner already associated w/uuid
    owner_lookup.put_collection(owner=authenticated_user_email,
                                collection_fqid=str(
                                    CollectionFQID(collection_uuid,
                                                   collection_version)))
    # add the collection file to the bucket
    handle.upload_file_handle(
        Replica[replica].bucket,
        CollectionFQID(collection_uuid, collection_version).to_key(),
        io.BytesIO(json.dumps(collection_body).encode("utf-8")))
    return jsonify(dict(uuid=collection_uuid,
                        version=collection_version)), requests.codes.created
示例#6
0
def list_collections(per_page: int, start_at: int = 0):
    """
    Return a list of a user's collections.

    Collection uuids are indexed and called by the user's email in a dynamoDB table.

    :param int per_page: # of collections returned per paged response.
    :param int start_at: Where the next chunk of paged response should start at.
    :return: A dictionary containing a list of dictionaries looking like:
        {'collections': [{'uuid': uuid, 'version': version}, {'uuid': uuid, 'version': version}, ... , ...]}
    """
    # TODO: Replica is unused, so this does not use replica.  Appropriate?
    owner = security.get_token_email(request.token_info)

    collections = []
    for collection in owner_lookup.get_collection_fqids_for_owner(owner):
        fqid = CollectionFQID.from_key(f'{COLLECTION_PREFIX}/{collection}')
        collections.append({'uuid': fqid.uuid, 'version': fqid.version})

    # paged response
    if len(collections) - start_at > per_page:
        next_url = UrlBuilder(request.url)
        next_url.replace_query("start_at", str(start_at + per_page))
        collection_page = collections[start_at:start_at + per_page]
        response = make_response(jsonify({'collections': collection_page}),
                                 requests.codes.partial)
        response.headers['Link'] = f"<{next_url}>; rel='next'"
        response.headers['X-OpenAPI-Pagination'] = 'true'
    # single response returning all collections (or those remaining)
    else:
        collection_page = collections[start_at:]
        response = make_response(jsonify({'collections': collection_page}),
                                 requests.codes.ok)
        response.headers['X-OpenAPI-Pagination'] = 'false'
    response.headers['X-OpenAPI-Paginated-Content-Key'] = 'collections'
    return response
    def _collections_in_bucket_but_not_in_database(self):
        """
        Returns any (owner, collection_fqid) present in the bucket but not in the collections table.

        Returns an iterable tuple of strings: (owner, collection_fqid) representing the item's key pair.

        The returned keys can then be added to the collections dynamodb table.
        """
        for collection_key in self.handle.list(self.bucket, prefix=f'{COLLECTION_PREFIX}/'):
            self.total_bucket_collection_items += 1
            collection_fqid = CollectionFQID.from_key(collection_key)
            if not self._is_uuid_tombstoned(collection_fqid.uuid):
                try:
                    collection = json.loads(self.handle.get(self.bucket, collection_key))
                    try:
                        owner_lookup.get_collection(owner=collection['owner'], collection_fqid=str(collection_fqid))
                    except DynamoDBItemNotFound:
                        yield collection['owner'], str(collection_fqid)
                except BlobNotFoundError:
                    pass  # if deleted from bucket while being listed
                except KeyError:
                    pass  # unexpected response
            else:
                self.total_tombstoned_bucket_collection_items += 1
示例#8
0
def dependencies_exist(source_replica: Replica, dest_replica: Replica,
                       key: str):
    """
    Given a source replica and manifest key, checks if all dependencies of the corresponding DSS object are present in
    dest_replica:
     - Given a file manifest key, checks if blobs exist in dest_replica.
     - Given a bundle manifest key, checks if file manifests exist in dest_replica.
     - Given a collection key, checks if all collection contents exist in dest_replica.
    Returns true if all dependencies exist in dest_replica, false otherwise.
    """
    source_handle = Config.get_blobstore_handle(source_replica)
    dest_handle = Config.get_blobstore_handle(dest_replica)
    if key.endswith(TOMBSTONE_SUFFIX):
        return True
    elif key.startswith(FILE_PREFIX):
        file_id = FileFQID.from_key(key)
        file_manifest = get_json_metadata(
            entity_type="file",
            uuid=file_id.uuid,
            version=file_id.version,
            replica=source_replica,
            blobstore_handle=source_handle,
            max_metadata_size=max_syncable_metadata_size)
        blob_path = compose_blob_key(file_manifest)
        if exists(dest_replica, blob_path):
            return True
    elif key.startswith(BUNDLE_PREFIX):
        # head all file manifests
        bundle_id = BundleFQID.from_key(key)
        bundle_manifest = get_json_metadata(
            entity_type="bundle",
            uuid=bundle_id.uuid,
            version=bundle_id.version,
            replica=source_replica,
            blobstore_handle=source_handle,
            max_metadata_size=max_syncable_metadata_size)
        try:
            with ThreadPoolExecutor(max_workers=20) as e:
                futures = list()
                for file in bundle_manifest[BundleMetadata.FILES]:
                    file_uuid = file[BundleFileMetadata.UUID]
                    file_version = file[BundleFileMetadata.VERSION]
                    futures.append(
                        e.submit(get_json_metadata,
                                 entity_type="file",
                                 uuid=file_uuid,
                                 version=file_version,
                                 replica=dest_replica,
                                 blobstore_handle=source_handle,
                                 max_metadata_size=max_syncable_metadata_size))
                for future in as_completed(futures):
                    future.result()
            return True
        except Exception:
            pass
    elif key.startswith(COLLECTION_PREFIX):
        collection_id = CollectionFQID.from_key(key)
        collection_manifest = get_json_metadata(
            entity_type="collection",
            uuid=collection_id.uuid,
            version=collection_id.version,
            replica=source_replica,
            blobstore_handle=source_handle,
            max_metadata_size=max_syncable_metadata_size)
        try:
            verify_collection(contents=collection_manifest["contents"],
                              replica=dest_replica,
                              blobstore_handle=dest_handle)
            return True
        except Exception:
            pass
    else:
        raise NotImplementedError("Unknown prefix for key {}".format(key))
    return False
示例#9
0
def get_collection_fqid() -> CollectionFQID:
    return CollectionFQID(uuid=str(uuid.uuid4()), version=get_version())