示例#1
0
    def _test_file_get_invalid_token(self, replica: Replica, scheme: str,
                                     test_bucket: str, uploader: Uploader):
        src_key = generate_test_key()
        src_data = os.urandom(1024)
        with tempfile.NamedTemporaryFile(delete=True) as fh:
            fh.write(src_data)
            fh.flush()

            uploader.checksum_and_upload_file(fh.name, src_key, "text/plain")

        source_url = f"{scheme}://{test_bucket}/{src_key}"

        file_uuid = str(uuid.uuid4())
        bundle_uuid = str(uuid.uuid4())
        version = datetime_to_version_format(datetime.datetime.utcnow())

        # should be able to do this twice (i.e., same payload, different UUIDs)
        self.upload_file(source_url,
                         file_uuid,
                         bundle_uuid=bundle_uuid,
                         version=version)
        url = str(UrlBuilder().set(path="/v1/files/" + file_uuid).add_query(
            "replica",
            replica.name).add_query("version",
                                    version).add_query("token", "{}"))

        @eventually(30, 0.1)
        def try_get():
            self.assertGetResponse(url,
                                   requests.codes.bad_request,
                                   headers=get_auth_header())

        try_get()
    def upload_file(self, contents):
        s3_test_bucket = get_env("DSS_S3_BUCKET_TEST")
        src_key = generate_test_key()
        s3 = boto3.resource('s3')
        with io.BytesIO(json.dumps(
                contents).encode()) as fh, ChecksummingSink() as sink:
            sink.write(fh.read())
            sums = sink.get_checksums()
            metadata = {
                'hca-dss-crc32c': sums['crc32c'].lower(),
                'hca-dss-s3_etag': sums['s3_etag'].lower(),
                'hca-dss-sha1': sums['sha1'].lower(),
                'hca-dss-sha256': sums['sha256'].lower()
            }
            fh.seek(0)
            # TODO: consider switching to unmanaged uploader (putobject w/blob)
            s3.Bucket(s3_test_bucket).Object(src_key).upload_fileobj(
                fh, ExtraArgs={"Metadata": metadata})
        source_url = f"s3://{s3_test_bucket}/{src_key}"
        file_uuid = str(uuid4())
        version = datetime_to_version_format(datetime.utcnow())
        urlbuilder = UrlBuilder().set(path='/v1/files/' + file_uuid)
        urlbuilder.add_query("version", version)

        resp_obj = self.assertPutResponse(str(urlbuilder),
                                          requests.codes.created,
                                          json_request_body=dict(
                                              creator_uid=0,
                                              source_url=source_url))
        return file_uuid, resp_obj.json["version"]
    def _test_bundle_delete(self, replica: Replica, fixtures_bucket: str, authorized: bool):
        schema = replica.storage_schema

        # prep existing bundle
        bundle_uuid = str(uuid.uuid4())
        file_uuid = str(uuid.uuid4())
        resp_obj = self.upload_file_wait(
            f"{schema}://{fixtures_bucket}/test_good_source_data/0",
            replica,
            file_uuid,
            bundle_uuid=bundle_uuid,
        )
        file_version = resp_obj.json['version']

        bundle_version = datetime_to_version_format(datetime.datetime.utcnow())
        self.put_bundle(
            replica,
            bundle_uuid,
            [(file_uuid, file_version, "LICENSE")],
            bundle_version,
        )

        handle = Config.get_blobstore_handle(replica)
        bucket = replica.bucket

        self.delete_bundle(replica, bundle_uuid, authorized=authorized)
        tombstone_exists = test_object_exists(handle, bucket, f"bundles/{bundle_uuid}.dead")
        self.assertEquals(tombstone_exists, authorized)

        self.delete_bundle(replica, bundle_uuid, bundle_version, authorized=authorized)
        tombstone_exists = test_object_exists(handle, bucket, f"bundles/{bundle_uuid}.{bundle_version}.dead")
        self.assertEquals(tombstone_exists, authorized)
示例#4
0
 def _upload_bundle(self, replica, uuid=None):
     if replica == Replica.aws:
         test_fixtures_bucket = get_env('DSS_S3_BUCKET_TEST_FIXTURES')
     else:
         test_fixtures_bucket = get_env('DSS_GS_BUCKET_TEST_FIXTURES')
     bundle_uuid = uuid if uuid else str(uuid4())
     file_uuid_1 = str(uuid4())
     file_uuid_2 = str(uuid4())
     filenames = ["file_1", "file_2"]
     resp_obj_1 = self.upload_file_wait(
         f"{replica.storage_schema}://{test_fixtures_bucket}/test_good_source_data/0",
         replica,
         file_uuid_1,
         bundle_uuid=bundle_uuid,
     )
     resp_obj_2 = self.upload_file_wait(
         f"{replica.storage_schema}://{test_fixtures_bucket}/test_good_source_data/1",
         replica,
         file_uuid_2,
         bundle_uuid=bundle_uuid,
     )
     file_version_1 = resp_obj_1.json['version']
     file_version_2 = resp_obj_2.json['version']
     bundle_version = datetime_to_version_format(datetime.datetime.utcnow())
     self.put_bundle(
         replica,
         bundle_uuid,
         [(file_uuid_1, file_version_1, filenames[0]),
          (file_uuid_2, file_version_2, filenames[1])],
         bundle_version,
     )
     return bundle_uuid, bundle_version
    def _test_file_put(self, replica: Replica, scheme: str, test_bucket: str, uploader: Uploader):
        src_key = generate_test_key()
        src_data = os.urandom(1024)
        with tempfile.NamedTemporaryFile(delete=True) as fh:
            fh.write(src_data)
            fh.flush()

            uploader.checksum_and_upload_file(fh.name, src_key, "text/plain")

        source_url = f"{scheme}://{test_bucket}/{src_key}"

        file_uuid = str(uuid.uuid4())
        bundle_uuid = str(uuid.uuid4())
        version = datetime_to_version_format(datetime.datetime.utcnow())

        # should be able to do this twice (i.e., same payload, different UUIDs)
        self.upload_file(source_url, file_uuid, bundle_uuid=bundle_uuid, version=version)
        self.upload_file(source_url, str(uuid.uuid4()))

        # should be able to do this twice (i.e., same payload, same UUIDs)
        self.upload_file(source_url, file_uuid, bundle_uuid=bundle_uuid,
                         version=version, expected_code=requests.codes.ok)

        # should *NOT* be able to do this twice (i.e., different payload, same UUIDs)
        self.upload_file(source_url, file_uuid, version=version, expected_code=requests.codes.conflict)
示例#6
0
    def _put(self,
             contents: typing.List,
             authorized: bool = True,
             uuid: typing.Optional[str] = None,
             version: typing.Optional[str] = None,
             replica: str = 'aws') -> typing.Tuple[str, str]:
        uuid = str(uuid4()) if uuid is None else uuid
        version = datetime_to_version_format(
            datetime.now()) if version is None else version

        params = dict()
        if uuid != 'missing':
            params['uuid'] = uuid
        if version != 'missing':
            params['version'] = version
        if replica != 'missing':
            params['replica'] = replica

        res = self.app.put("/v1/collections",
                           headers=get_auth_header(authorized=authorized),
                           params=params,
                           json=dict(name="n",
                                     description="d",
                                     details={},
                                     contents=contents))
        return res.json()["uuid"], res.json()["version"]
示例#7
0
def patch(uuid: str, json_request_body: dict, replica: str, version: str):
    bundle = get_bundle_manifest(uuid, Replica[replica], version)
    if bundle is None:
        raise DSSException(404, "not_found",
                           "Could not find bundle for UUID {}".format(uuid))

    remove_files_set = {
        bundle_file_id_metadata(f)
        for f in json_request_body.get("remove_files", [])
    }
    bundle['files'] = [
        f for f in bundle['files']
        if bundle_file_id_metadata(f) not in remove_files_set
    ]
    add_files = json_request_body.get("add_files", [])
    bundle['files'].extend(
        build_bundle_file_metadata(Replica[replica], add_files))
    detect_filename_collisions(bundle['files'])

    timestamp = datetime.datetime.utcnow()
    new_bundle_version = datetime_to_version_format(timestamp)
    bundle['version'] = new_bundle_version
    _save_bundle(Replica[replica], uuid, new_bundle_version, bundle)
    return jsonify(dict(uuid=uuid,
                        version=new_bundle_version)), requests.codes.ok
示例#8
0
 def __init__(self, handle: BlobStore, path: str, bucket: str, replica: Replica = Replica.aws,
              bundle_uuid: str = None) -> None:
     self.path = path
     self.uuid = bundle_uuid if bundle_uuid else str(uuid.uuid4())
     self.version = datetime_to_version_format(datetime.datetime.utcnow())
     self.handle = handle
     self.bucket = bucket
     self.files = self.enumerate_bundle_files(replica)
示例#9
0
    def upload_file_wait(
        self: typing.Any,
        source_url: str,
        replica: Replica,
        file_uuid: str = None,
        file_version: str = None,
        bundle_uuid: str = None,
        timeout_seconds: int = 120,
        expect_async: typing.Optional[bool] = None,
    ) -> DSSAssertResponse:
        """
        Upload a file.  If the request is being handled asynchronously, wait until the file has landed in the data
        store.
        """
        file_uuid = str(uuid.uuid4()) if file_uuid is None else file_uuid
        bundle_uuid = str(uuid.uuid4()) if bundle_uuid is None else bundle_uuid
        if expect_async is True:
            expected_codes = requests.codes.accepted
        elif expect_async is False:
            expected_codes = requests.codes.created
        else:
            expected_codes = requests.codes.created, requests.codes.accepted

        if file_version is None:
            timestamp = datetime.datetime.utcnow()
            file_version = datetime_to_version_format(timestamp)
        url = UrlBuilder().set(path=f"/v1/files/{file_uuid}")
        url.add_query("version", file_version)

        resp_obj = self.assertPutResponse(
            str(url),
            expected_codes,
            json_request_body=dict(
                bundle_uuid=bundle_uuid,
                creator_uid=0,
                source_url=source_url,
            ),
        )

        if resp_obj.response.status_code == requests.codes.accepted:
            # hit the GET /files endpoint until we succeed.
            start_time = time.time()
            timeout_time = start_time + timeout_seconds

            while time.time() < timeout_time:
                try:
                    self.assertHeadResponse(
                        f"/v1/files/{file_uuid}?replica={replica.name}",
                        requests.codes.ok)
                    break
                except AssertionError:
                    pass

                time.sleep(1)
            else:
                self.fail("Could not find the output file")

        return resp_obj
示例#10
0
def enumerate(replica: str,
              prefix: typing.Optional[str] = None,
              token: typing.Optional[str] = None,
              per_page: int = PerPageBounds.per_page_max,
              search_after: typing.Optional[str] = None):
    """
    :param replica: replica name to enumerate against
    :param prefix: uuid prefix used to filter enumeration
    :param token: used to page searches, should not be set by the user.
    :param per_page: max items per page to show, 10 <= per_page <= 500
    :param search_after: used to page searches, should not be set by the user.
    """
    if prefix:
        search_prefix = f'{BUNDLE_PREFIX}/{prefix.lower()}'
    else:
        search_prefix = f'{BUNDLE_PREFIX}/'
    api_domain_name = f'https://{os.environ.get("API_DOMAIN_NAME")}'
    payload = dict(dss_api=api_domain_name,
                   object='list',
                   per_page=per_page,
                   search_prefix=search_prefix,
                   event_timestamp=datetime_to_version_format(
                       datetime.datetime.utcnow()))  # type: typing.Any
    kwargs = dict(replica=Replica[replica].name,
                  prefix=search_prefix,
                  per_page=per_page)
    if search_after:
        kwargs['search_after'] = search_after
    if token:
        kwargs['token'] = token

    payload.update(enumerate_available_bundles(**kwargs))  # type: ignore
    if payload['token'] is None:
        payload['token'] = ""

    if payload['page_count'] < per_page:
        # enumeration is complete
        payload.update(dict(has_more=False))
        del payload['token']
        del payload['search_after']
        response = make_response(jsonify(payload), requests.codes.ok)
        response.headers['X-OpenAPI-Pagination'] = 'false'
    else:
        next_url = UrlBuilder(request.url)
        next_url.replace_query("search_after", payload['search_after'])
        next_url.replace_query("token", payload['token'])
        link = f"<{next_url}>; rel='next'"
        payload.update(
            dict(has_more=True, token=payload['token'], link=f'{next_url}'))
        response = make_response(jsonify(payload), requests.codes.partial)
        response.headers['Link'] = link
        response.headers['X-OpenAPI-Pagination'] = 'true'
    response.headers['X-OpenAPI-Paginated-Content-Key'] = 'bundles'
    return response
示例#11
0
    def put_bundles_reponse(self, path, replica, expected_code):
        """
        Uploads a file from fixtures to the dss, and then adds it to a bundle with the 'path' name.
        Asserts expected codes were received at each point.
        """
        fixtures_bucket = self.get_test_fixture_bucket(
            replica.name)  # source a file to upload
        file_version = datetime_to_version_format(datetime.datetime.utcnow())
        bundle_version = datetime_to_version_format(datetime.datetime.utcnow())
        bundle_uuid = str(uuid.uuid4())
        file_uuid = str(uuid.uuid4())
        storage_schema = 's3' if replica.name == 'aws' else 'gs'

        # upload a file from test fixtures
        self.upload_file_wait(
            f"{storage_schema}://{fixtures_bucket}/test_good_source_data/0",
            replica,
            file_uuid,
            file_version=file_version,
            bundle_uuid=bundle_uuid)

        # add that file to a bundle
        builder = UrlBuilder().set(path="/v1/bundles/" + bundle_uuid)
        builder.add_query("replica", replica.name)
        builder.add_query("version", bundle_version)
        url = str(builder)

        self.assertPutResponse(url,
                               expected_code,
                               json_request_body=dict(
                                   files=[
                                       dict(uuid=file_uuid,
                                            version=file_version,
                                            name=path,
                                            indexed=False)
                                   ],
                                   creator_uid=0,
                               ),
                               headers=get_auth_header())
示例#12
0
def _upload_bundle(app, replica, uuid=None):
    files = list()
    test_fixtures_bucket = os.environ['DSS_GS_BUCKET_TEST_FIXTURES']
    for i in range(2):
        file_name = f"file_{i}"
        file_uuid, file_version = str(uuid4()), datetime_to_version_format(datetime.utcnow())
        source_url = f"{replica.storage_schema}://{test_fixtures_bucket}/test_good_source_data/0"
        resp = app.put(f"/v1/files/{file_uuid}?version={file_version}",
                       headers={** get_auth_header(), ** {'Content-Type': "application/json"}},
                       json=dict(creator_uid=0, source_url=source_url))
        resp.raise_for_status()
        files.append((file_uuid, file_version, file_name))
    bundle_uuid, bundle_version = str(uuid4()), datetime_to_version_format(datetime.utcnow())
    json_request_body = dict(creator_uid=0,
                             files=[dict(uuid=file_uuid, version=file_version, name=file_name, indexed=False)
                                    for file_uuid, file_version, file_name in files])
    resp = app.put(f"/v1/bundles/{bundle_uuid}?replica={replica.name}&version={bundle_version}",
                   headers={** get_auth_header(), ** {'Content-Type': "application/json"}},
                   json=json_request_body)
    resp.raise_for_status()
    resp = app.get(f"/v1/bundles/{bundle_uuid}?replica={replica.name}&version={bundle_version}")
    return bundle_uuid, bundle_version
示例#13
0
    class Iterator:
        keys = [BundleFQID(uuid=uuid.uuid4(),
                           version=datetime_to_version_format(datetime.datetime.utcnow())).to_key()
                for i in range(10)]

        def __init__(self, *args, **kwargs):
            self.start_after_key = None
            self.token = 'frank'

        def __iter__(self):
            for key in self.keys:
                self.start_after_key = key
                yield self.start_after_key
示例#14
0
    def _test_file_put_cached(self, replica: Replica, scheme: str,
                              test_bucket: str, test_checkout_bucket: str,
                              uploader: Uploader):
        stored_cache_criteria = os.environ.get('CHECKOUT_CACHE_CRITERIA')
        try:
            os.environ[
                'CHECKOUT_CACHE_CRITERIA'] = '[{"type":"application/json","max_size":12314}]'
            handle = Config.get_blobstore_handle(replica)
            src_key = generate_test_key()
            src_data = b'{"status":"valid"}'
            source_url = f"{scheme}://{test_bucket}/{src_key}"
            file_uuid = str(uuid.uuid4())
            bundle_uuid = str(uuid.uuid4())
            version = datetime_to_version_format(datetime.datetime.utcnow())

            # write dummy file and upload to upload area
            with tempfile.NamedTemporaryFile(delete=True) as fh:
                fh.write(src_data)
                fh.flush()

                uploader.checksum_and_upload_file(fh.name, src_key,
                                                  "application/json")

            # upload file to DSS
            self.upload_file(source_url,
                             file_uuid,
                             bundle_uuid=bundle_uuid,
                             version=version)

            metadata = handle.get_user_metadata(test_bucket, src_key)
            dst_key = ("blobs/" + ".".join([
                metadata['hca-dss-sha256'], metadata['hca-dss-sha1'],
                metadata['hca-dss-s3_etag'], metadata['hca-dss-crc32c']
            ])).lower()

            for wait_to_upload_into_checkout_bucket in range(30):
                try:
                    # get uploaded blob key from the checkout bucket
                    file_metadata = json.loads(
                        handle.get(test_checkout_bucket,
                                   dst_key).decode("utf-8"))
                    break
                except BlobNotFoundError:
                    time.sleep(1)
            else:
                file_metadata = json.loads(
                    handle.get(test_checkout_bucket, dst_key).decode("utf-8"))
            assert file_metadata[
                "status"] == "valid"  # the file exists in the checkout bucket
        finally:
            os.environ['CHECKOUT_CACHE_CRITERIA'] = stored_cache_criteria
示例#15
0
 def test_put_invalid_fragment(self):
     """PUT invalid fragment reference."""
     uuid = str(uuid4())
     self.addCleanup(self._delete_collection, uuid)
     res = self.app.put("/v1/collections",
                        headers=get_auth_header(authorized=True),
                        params=dict(uuid=uuid,
                                    version=datetime_to_version_format(
                                        datetime.now()),
                                    replica="aws"),
                        json=dict(name="n",
                                  description="d",
                                  details={},
                                  contents=[self.invalid_ptr] * 128))
     self.assertEqual(res.status_code, requests.codes.unprocessable_entity)
示例#16
0
def delete(uuid: str,
           replica: str,
           json_request_body: dict,
           version: str = None):
    email = request.token_info['email']

    if email not in ADMIN_USER_EMAILS:
        raise DSSException(
            requests.codes.forbidden,
            "forbidden",
            f"You can't delete bundles with these credentials!",
        )

    uuid = uuid.lower()
    version = datetime_to_version_format(
        iso8601.parse_date(version)) if version else None

    tombstone_id = TombstoneID(uuid=uuid, version=version)
    bundle_prefix = tombstone_id.to_key_prefix()
    tombstone_object_data = _create_tombstone_data(
        email=email,
        reason=json_request_body.get('reason'),
        version=version,
    )

    handle = Config.get_blobstore_handle(Replica[replica])
    bucket = Replica[replica].bucket

    if test_object_exists(handle,
                          bucket,
                          bundle_prefix,
                          test_type=ObjectTest.PREFIX):
        created, idempotent = _idempotent_save(handle, bucket,
                                               tombstone_id.to_key(),
                                               tombstone_object_data)
        if not idempotent:
            raise DSSException(
                requests.codes.conflict,
                f"bundle_tombstone_already_exists",
                f"bundle tombstone with UUID {uuid} and version {version} already exists",
            )
        status_code = requests.codes.ok
        response_body = dict()  # type: dict
    else:
        status_code = requests.codes.not_found
        response_body = dict(title="bundle not found")

    return jsonify(response_body), status_code
示例#17
0
    def test_502_get_bundle_HAS_retry_after_response(self):
        """Mock seems resistant to multiple calls, therefore this is only used for one endpoint."""
        with mock.patch('dss.api.bundles.get',
                        side_effect=DSSException(502, 'bad_gateway',
                                                 "Bad Gateway")):
            self.app = ThreadedLocalServer()
            self.app.start()
            uuid = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
            version = datetime_to_version_format(datetime.datetime.utcnow())

            url = str(UrlBuilder().set(path=f"/v1/bundles/{uuid}").add_query(
                "version", version).add_query("replica", 'aws'))

            r = self.assertGetResponse(url, 502, headers=get_auth_header())
            self.assertEqual(int(r.response.headers['Retry-After']), 10)
            self.app.shutdown()
示例#18
0
    def upload_file(app, contents, replica):
        src_key = generate_test_key()
        encoded = json.dumps(contents).encode()
        chunk_size = get_s3_multipart_chunk_size(len(encoded))
        with io.BytesIO(encoded) as fh, ChecksummingSink(
                write_chunk_size=chunk_size) as sink:
            sink.write(fh.read())
            sums = sink.get_checksums()
            metadata = {
                'hca-dss-crc32c': sums['crc32c'].lower(),
                'hca-dss-s3_etag': sums['s3_etag'].lower(),
                'hca-dss-sha1': sums['sha1'].lower(),
                'hca-dss-sha256': sums['sha256'].lower()
            }
            fh.seek(0)

            if replica == 'gcp':
                gs_test_bucket = get_env("DSS_GS_BUCKET_TEST")
                gcp_client = gs_storage.Client.from_service_account_json(
                    os.getenv("GOOGLE_APPLICATION_CREDENTIALS"))
                gs_bucket = gcp_client.bucket(gs_test_bucket)
                blob = gs_bucket.blob(src_key)
                blob.upload_from_file(fh, content_type="application/json")
                blob.metadata = metadata
                blob.patch()
                source_url = f"gs://{gs_test_bucket}/{src_key}"

            if replica == 'aws':
                # TODO: consider switching to unmanaged uploader (putobject w/blob)
                s3_test_bucket = get_env("DSS_S3_BUCKET_TEST")
                s3 = boto3.resource('s3')
                s3.Bucket(s3_test_bucket).Object(src_key).upload_fileobj(
                    fh, ExtraArgs={"Metadata": metadata})
                source_url = f"s3://{s3_test_bucket}/{src_key}"

        file_uuid = str(uuid4())
        version = datetime_to_version_format(datetime.utcnow())
        urlbuilder = UrlBuilder().set(path='/v1/files/' + file_uuid)
        urlbuilder.add_query("version", version)

        resp_obj = app.put(str(urlbuilder),
                           json=dict(creator_uid=0, source_url=source_url),
                           headers=get_auth_header())
        resp_obj.raise_for_status()
        return file_uuid, resp_obj.json()["version"]
示例#19
0
    def test_file_put_large_incorrect_s3_etag(self) -> None:
        bucket = self.s3_test_bucket
        src_key = generate_test_key()
        src_data = os.urandom(ASYNC_COPY_THRESHOLD + 1)

        # upload file with incompatible s3 part size
        self._upload_file_to_mock_ingest(S3Uploader,
                                         bucket,
                                         src_key,
                                         src_data,
                                         s3_part_size=6 * 1024 * 1024)

        file_uuid = str(uuid.uuid4())
        timestamp = datetime.datetime.utcnow()
        file_version = datetime_to_version_format(timestamp)
        url = UrlBuilder().set(path=f"/v1/files/{file_uuid}")
        url.add_query("version", file_version)
        source_url = f"s3://{bucket}/{src_key}"

        # put file into DSS, starting an async copy which will fail
        expected_codes = requests.codes.accepted,
        self.assertPutResponse(str(url),
                               expected_codes,
                               json_request_body=dict(
                                   file_uuid=file_uuid,
                                   creator_uid=0,
                                   source_url=source_url,
                               ),
                               headers=get_auth_header())

        # should eventually get unprocessable after async copy fails
        @eventually(120, 1)
        def tryHead():
            self.assertHeadResponse(
                f"/v1/files/{file_uuid}?replica=aws&version={file_version}",
                requests.codes.unprocessable)

        tryHead()

        # should get unprocessable on GCP too
        self.assertHeadResponse(
            f"/v1/files/{file_uuid}?replica=gcp&version={file_version}",
            requests.codes.unprocessable)
示例#20
0
def patch(uuid: str, json_request_body: dict, replica: str, version: str):
    authenticated_user_email = security.get_token_email(request.token_info)

    uuid = uuid.lower()
    owner = get_impl(uuid=uuid, replica=replica)["owner"]
    if owner != authenticated_user_email:
        raise DSSException(requests.codes.forbidden, "forbidden",
                           f"Collection access denied")

    handle = Config.get_blobstore_handle(Replica[replica])
    try:
        cur_collection_blob = handle.get(
            Replica[replica].bucket,
            CollectionFQID(uuid, version).to_key())
    except BlobNotFoundError:
        raise DSSException(
            404, "not_found",
            "Could not find collection for UUID {}".format(uuid))
    collection = json.loads(cur_collection_blob)
    for field in "name", "description", "details":
        if field in json_request_body:
            collection[field] = json_request_body[field]
    remove_contents_set = set(
        map(hashabledict, json_request_body.get("remove_contents", [])))
    collection["contents"] = [
        i for i in collection["contents"]
        if hashabledict(i) not in remove_contents_set
    ]
    verify_collection(json_request_body.get("add_contents", []),
                      Replica[replica], handle)
    collection["contents"].extend(json_request_body.get("add_contents", []))
    collection["contents"] = _dedpuplicate_contents(collection["contents"])
    timestamp = datetime.datetime.utcnow()
    new_collection_version = datetime_to_version_format(timestamp)
    handle.upload_file_handle(
        Replica[replica].bucket,
        CollectionFQID(uuid, new_collection_version).to_key(),
        io.BytesIO(json.dumps(collection).encode("utf-8")))
    return jsonify(dict(uuid=uuid,
                        version=new_collection_version)), requests.codes.ok
示例#21
0
    def test_file_put_bad_checksum(self):
        src_key = generate_test_key()
        uploader = S3Uploader(tempfile.gettempdir(), self.s3_test_bucket)
        with tempfile.NamedTemporaryFile(delete=True) as fh:
            fh.write(os.urandom(1024))
            fh.flush()
            uploader.upload_file(fh.name,
                                 src_key,
                                 'text/plain',
                                 metadata_keys=self.bad_checksums)

        source_url = f's3://{self.s3_test_bucket}/{src_key}'
        file_uuid = str(uuid.uuid4())
        bundle_uuid = str(uuid.uuid4())
        version = datetime_to_version_format(datetime.datetime.utcnow())
        # catch AssertionError raised when upload returns 422 instead of 201
        with self.assertRaises(AssertionError):
            r = self.upload_file(source_url,
                                 file_uuid,
                                 bundle_uuid=bundle_uuid,
                                 version=version)
            self.assertEqual(r.json['code'], 'invalid_checksum')
示例#22
0
def is_DSS_VERSION(val):
    """
    Verifies `val` is compliant with expected format. See for more info on connexion custom type formats
    https://connexion.readthedocs.io/en/latest/cookbook.html#custom-type-format.
    :param val: the value to verify
    :return: the verified value
    """
    from iso8601 import iso8601
    # convert it to date-time so we can format exactly as the system requires (with microsecond precision)
    try:
        timestamp = iso8601.parse_date(val)
    except iso8601.ParseError:
        raise DSSException(
            requests.codes.bad_request, "illegal_version",
            f"version should be an RFC3339 compliant timestamp")
    timestamp = datetime_to_version_format(timestamp)
    if timestamp != val:
        raise DSSException(
            requests.codes.bad_request, "illegal_version",
            f"version should be a DSS_VERSION with the format 'YYYY-MM-DDTHHmmSS.zzzzzzZ'"
        )
    return val
示例#23
0
    def test_regex_patterns(self):
        version = datetime_to_version_format(datetime.datetime.utcnow())
        key = f"bundles/{uuid4()}.{version}"
        tombstone_key_with_version = key + ".dead"
        tombstone_key_without_version = f"bundles/{uuid4()}.dead"

        self.assertIsNone(notify_v2._versioned_tombstone_key_regex.match(key))
        self.assertIsNone(
            notify_v2._unversioned_tombstone_key_regex.match(key))

        self.assertIsNotNone(
            notify_v2._versioned_tombstone_key_regex.match(
                tombstone_key_with_version))
        self.assertIsNone(
            notify_v2._versioned_tombstone_key_regex.match(
                tombstone_key_without_version))

        self.assertIsNone(
            notify_v2._unversioned_tombstone_key_regex.match(
                tombstone_key_with_version))
        self.assertIsNotNone(
            notify_v2._unversioned_tombstone_key_regex.match(
                tombstone_key_without_version))
示例#24
0
def get_version():
    return datetime_to_version_format(datetime.datetime.utcnow())
示例#25
0
def put(uuid: str, replica: str, json_request_body: dict, version: str = None):
    uuid = uuid.lower()
    if version is not None:
        # convert it to date-time so we can format exactly as the system requires (with microsecond precision)
        timestamp = iso8601.parse_date(version)
    else:
        timestamp = datetime.datetime.utcnow()
    version = datetime_to_version_format(timestamp)

    handle = Config.get_blobstore_handle(Replica[replica])
    bucket = Replica[replica].bucket

    # what's the target object name for the bundle manifest?
    bundle_manifest_key = BundleFQID(uuid=uuid, version=version).to_key()

    # decode the list of files.
    files = [{'user_supplied_metadata': file} for file in json_request_body['files']]

    time_left = nestedcontext.inject("time_left")

    while True:  # each time through the outer while-loop, we try to gather up all the file metadata.
        for file in files:
            user_supplied_metadata = file['user_supplied_metadata']
            metadata_key = FileFQID(
                uuid=user_supplied_metadata['uuid'],
                version=user_supplied_metadata['version'],
            ).to_key()
            if 'file_metadata' not in file:
                try:
                    file_metadata = handle.get(bucket, metadata_key)
                except BlobNotFoundError:
                    continue
                file['file_metadata'] = json.loads(file_metadata)
                if uuid != file['file_metadata']['bundle_uuid']:
                    raise DSSException(
                        requests.codes.conflict,
                        "incorrect_file_bundle_uuid",
                        f"File bundle_uuid {file['file_metadata']['bundle_uuid']} does not equal bundle uuid {uuid}"
                    )

        # check to see if any file metadata is still not yet loaded.
        for file in files:
            if 'file_metadata' not in file:
                missing_file_user_metadata = file['user_supplied_metadata']
                break
        else:
            break

        # if we're out of time, give up.
        if time_left() > PUT_TIME_ALLOWANCE_SECONDS:
            time.sleep(1)
            continue

        raise DSSException(
            requests.codes.conflict,
            "file_missing",
            f"Could not find file {missing_file_user_metadata['uuid']}/{missing_file_user_metadata['version']}."
        )

    # build a manifest consisting of all the files.
    bundle_metadata = {
        BundleMetadata.FORMAT: BundleMetadata.FILE_FORMAT_VERSION,
        BundleMetadata.VERSION: version,
        BundleMetadata.FILES: [
            {
                BundleFileMetadata.NAME: file['user_supplied_metadata']['name'],
                BundleFileMetadata.UUID: file['user_supplied_metadata']['uuid'],
                BundleFileMetadata.VERSION: file['user_supplied_metadata']['version'],
                BundleFileMetadata.CONTENT_TYPE: file['file_metadata'][FileMetadata.CONTENT_TYPE],
                BundleFileMetadata.SIZE: file['file_metadata'][FileMetadata.SIZE],
                BundleFileMetadata.INDEXED: file['user_supplied_metadata']['indexed'],
                BundleFileMetadata.CRC32C: file['file_metadata'][FileMetadata.CRC32C],
                BundleFileMetadata.S3_ETAG: file['file_metadata'][FileMetadata.S3_ETAG],
                BundleFileMetadata.SHA1: file['file_metadata'][FileMetadata.SHA1],
                BundleFileMetadata.SHA256: file['file_metadata'][FileMetadata.SHA256],
            }
            for file in files
        ],
        BundleMetadata.CREATOR_UID: json_request_body['creator_uid'],
    }

    created, idempotent = _idempotent_save(
        handle,
        bucket,
        bundle_manifest_key,
        bundle_metadata,
    )

    if not idempotent:
        raise DSSException(
            requests.codes.conflict,
            "bundle_already_exists",
            f"bundle with UUID {uuid} and version {version} already exists"
        )
    status_code = requests.codes.created if created else requests.codes.ok

    return jsonify(dict(version=version)), status_code
    def _test_bundle_put(self, replica: Replica, fixtures_bucket: str):
        schema = replica.storage_schema

        bundle_uuid = str(uuid.uuid4())
        file_uuid = str(uuid.uuid4())
        missing_file_uuid = str(uuid.uuid4())
        resp_obj = self.upload_file_wait(
            f"{schema}://{fixtures_bucket}/test_good_source_data/0",
            replica,
            file_uuid,
            bundle_uuid=bundle_uuid,
        )
        file_version = resp_obj.json['version']

        # first bundle.
        bundle_version = datetime_to_version_format(datetime.datetime.utcnow())
        self.put_bundle(
            replica,
            bundle_uuid,
            [(file_uuid, file_version, "LICENSE")],
            bundle_version,
        )

        # should be able to do this twice (i.e., same payload, same UUIDs)
        self.put_bundle(
            replica,
            bundle_uuid,
            [(file_uuid, file_version, "LICENSE")],
            bundle_version,
            requests.codes.ok,
        )

        # should *NOT* be able to do this twice with different payload.
        self.put_bundle(
            replica,
            bundle_uuid,
            [(file_uuid, file_version, "LICENSE1")],
            bundle_version,
            requests.codes.conflict,
        )

        # should *NOT* be able to upload a bundle with a missing file, but we should get requests.codes.conflict.
        with nestedcontext.bind(time_left=lambda: 0):
            resp_obj = self.put_bundle(
                replica,
                bundle_uuid,
                [
                    (file_uuid, file_version, "LICENSE0"),
                    (missing_file_uuid, file_version, "LICENSE1"),
                ],
                expected_code=requests.codes.conflict,
            )
            self.assertEqual(resp_obj.json['code'], "file_missing")

        # should *NOT* be able to upload a bundle containing a file with an incorrect bundle_uuid
        # but we should get requests.codes.conflict
        with nestedcontext.bind(time_left=lambda: 0):
            resp_obj = self.put_bundle(
                replica,
                str(uuid.uuid4()),  # uploading new bundle with old file
                [(file_uuid, file_version, "LICENSE")],
                datetime_to_version_format(datetime.datetime.utcnow()),
                expected_code=requests.codes.conflict,
            )
            self.assertEqual(resp_obj.json['code'], "incorrect_file_bundle_uuid")

        # uploads a file, but delete the file metadata. put it back after a delay.
        self.upload_file_wait(
            f"{schema}://{fixtures_bucket}/test_good_source_data/0",
            replica,
            missing_file_uuid,
            file_version,
            bundle_uuid=bundle_uuid
        )
        handle = Config.get_blobstore_handle(replica)
        bucket = replica.bucket
        file_metadata = handle.get(bucket, f"files/{missing_file_uuid}.{file_version}")
        handle.delete(bucket, f"files/{missing_file_uuid}.{file_version}")

        class UploadThread(threading.Thread):
            def run(innerself):
                time.sleep(5)
                data_fh = io.BytesIO(file_metadata)
                handle.upload_file_handle(bucket, f"files/{missing_file_uuid}.{file_version}", data_fh)

        # start the upload (on a delay...)
        upload_thread = UploadThread()
        upload_thread.start()

        # this should at first fail to find one of the files, but the UploadThread will eventually upload the file
        # metadata.  since we give the upload bundle process ample time to spin, it should eventually find the file
        # metadata and succeed.
        with nestedcontext.bind(time_left=lambda: sys.maxsize):
            self.put_bundle(
                replica,
                bundle_uuid,
                [
                    (file_uuid, file_version, "LICENSE0"),
                    (missing_file_uuid, file_version, "LICENSE1"),
                ],
                expected_code=requests.codes.created,
            )
示例#27
0
    def test_notify_or_queue(self):
        replica = Replica.aws
        metadata_document = dict()
        subscription = {
            SubscriptionData.REPLICA: replica,
            SubscriptionData.OWNER: "bob",
            SubscriptionData.UUID: str(uuid4()),
        }

        with self.subTest("Should attempt to notify immediately"):
            with mock.patch(
                    "dss.events.handlers.notify_v2.notify") as mock_notify:
                with mock.patch.object(SQSMessenger, "send") as mock_send:
                    md = dict(**metadata_document, **dict(event_type="CREATE"))
                    key = f"bundles/{uuid4()}.{datetime_to_version_format(datetime.datetime.now())}"
                    notify_or_queue(replica, subscription, md, key)
                    mock_notify.assert_called()
                    mock_send.assert_not_called()
                    keys = [a[0][2] for a in mock_notify.call_args_list]
                    self.assertIn(key, keys)

        with self.subTest("Should queue when notify fails"):
            with mock.patch(
                    "dss.events.handlers.notify_v2.notify") as mock_notify:
                mock_notify.return_value = False
                with mock.patch.object(SQSMessenger, "send", mock_send):
                    md = dict(**metadata_document, **dict(event_type="CREATE"))
                    key = f"bundles/{uuid4()}.{datetime_to_version_format(datetime.datetime.now())}"
                    notify_or_queue(Replica.aws, subscription, md, key)
                    mock_notify.assert_called()
                    mock_send.assert_called()
                    keys = [
                        json.loads(a[0][0])['key']
                        for a in mock_send.call_args_list
                    ]
                    self.assertIn(key, keys)

        with self.subTest(
                "notify_or_queue should attempt to notify immediately for versioned tombstone"
        ):
            with mock.patch(
                    "dss.events.handlers.notify_v2.notify") as mock_notify:
                with mock.patch("dss.events.handlers.notify_v2._list_prefix"
                                ) as mock_list_prefix:
                    md = dict(**metadata_document,
                              **dict(event_type="TOMBSTONE"))
                    bundle_uuid = str(uuid4())
                    bundle_version = datetime_to_version_format(
                        datetime.datetime.utcnow())
                    key = f"bundles/{bundle_uuid}.{bundle_version}.dead"
                    mock_list_prefix.return_value = [key]
                    notify_or_queue(Replica.aws, subscription, md, key)
                    mock_notify.assert_called_with(subscription, md, key)
                    keys = [a[0][2] for a in mock_notify.call_args_list]
                    self.assertIn(key, keys)

        with self.subTest(
                "notify_or_queue should queue notifications for unversioned tombstone"
        ):
            md = dict(**metadata_document, **dict(event_type="TOMBSTONE"))
            bundle_uuid = str(uuid4())
            bundle_version_1 = datetime_to_version_format(
                datetime.datetime.utcnow())
            bundle_version_2 = datetime_to_version_format(
                datetime.datetime.utcnow())
            bundle_key_1 = f"bundles/{bundle_uuid}.{bundle_version_1}"
            bundle_key_2 = f"bundles/{bundle_uuid}.{bundle_version_2}"
            unversioned_tombstone_key = f"bundles/{bundle_uuid}.dead"

            with mock.patch.object(SQSMessenger, "send") as mock_send:
                with mock.patch("dss.events.handlers.notify_v2._list_prefix"
                                ) as mock_list_prefix:
                    mock_list_prefix.return_value = [
                        unversioned_tombstone_key,
                        bundle_key_1,
                        bundle_key_2,
                    ]
                    notify_or_queue(Replica.aws, subscription, md,
                                    unversioned_tombstone_key)
                    keys = [
                        json.loads(a[0][0])['key']
                        for a in mock_send.call_args_list
                    ]
                    self.assertIn(bundle_key_1, keys)
                    self.assertIn(bundle_key_2, keys)

        with self.subTest(
                "notify_or_queue should not re-queue tombstones versions of unversioned tombstones"
        ):
            md = dict(**metadata_document, **dict(event_type="TOMBSTONE"))
            bundle_uuid = str(uuid4())
            bundle_version_1 = datetime_to_version_format(
                datetime.datetime.utcnow())
            bundle_version_2 = datetime_to_version_format(
                datetime.datetime.utcnow())
            bundle_key_1 = f"bundles/{bundle_uuid}.{bundle_version_1}"
            bundle_key_2 = f"bundles/{bundle_uuid}.{bundle_version_2}.dead"
            unversioned_tombstone_key = f"bundles/{bundle_uuid}.dead"
            with mock.patch.object(SQSMessenger, "send") as mock_send:
                with mock.patch("dss.events.handlers.notify_v2._list_prefix"
                                ) as mock_list_prefix:
                    mock_list_prefix.return_value = [
                        unversioned_tombstone_key,
                        bundle_key_1,
                        bundle_key_2,
                    ]
                    notify_or_queue(Replica.aws, subscription, md,
                                    unversioned_tombstone_key)
                    mock_send.assert_called_once()
                    keys = [
                        json.loads(a[0][0])['key']
                        for a in mock_send.call_args_list
                    ]
                    self.assertIn(bundle_key_1, keys)
                    self.assertNotIn(bundle_key_2, keys)
示例#28
0
    def _test_file_get_checkout(self, replica: Replica, scheme: str,
                                test_bucket: str, uploader: Uploader):
        handle = Config.get_blobstore_handle(replica)
        src_key = generate_test_key()
        src_data = os.urandom(1024)
        source_url = f"{scheme}://{test_bucket}/{src_key}"
        file_uuid = str(uuid.uuid4())
        bundle_uuid = str(uuid.uuid4())
        version = datetime_to_version_format(datetime.datetime.utcnow())

        # write dummy file and upload to upload area
        with tempfile.NamedTemporaryFile(delete=True) as fh:
            fh.write(src_data)
            fh.flush()

            uploader.checksum_and_upload_file(fh.name, src_key, "text/plain")

        # upload file to DSS
        self.upload_file(source_url,
                         file_uuid,
                         bundle_uuid=bundle_uuid,
                         version=version)
        url = str(UrlBuilder().set(path="/v1/files/" + file_uuid).add_query(
            "replica", replica.name).add_query("version", version))

        # get uploaded blob key
        file_metadata = json.loads(
            handle.get(test_bucket,
                       f"files/{file_uuid}.{version}").decode("utf-8"))
        file_key = compose_blob_key(file_metadata)

        @eventually(20, 1)
        def test_checkout():
            # assert 302 and verify checksum on checkout completion
            api_get = self.assertGetResponse(url,
                                             requests.codes.found,
                                             headers=get_auth_header(),
                                             redirect_follow_retries=0)
            file_get = requests.get(api_get.response.headers['Location'])
            self.assertTrue(file_get.ok)
            self.assertEquals(file_get.content, src_data)

        with self.subTest(
                f"{replica}: Initiates checkout and returns 301 for GET on 'uncheckedout' file."
        ):
            # assert 301 redirect on first GET
            self.assertGetResponse(url,
                                   requests.codes.moved,
                                   headers=get_auth_header(),
                                   redirect_follow_retries=0)
            test_checkout()

        with self.subTest(
                f"{replica}: Initiates checkout and returns 301 for GET on nearly expired checkout file."
        ):
            now = datetime.datetime.now(datetime.timezone.utc)
            creation_date_fn = (
                "cloud_blobstore.s3.S3BlobStore.get_creation_date"
                if replica.name == "aws" else
                "cloud_blobstore.gs.GSBlobStore.get_creation_date")
            with mock.patch(creation_date_fn) as mock_creation_date:
                blob_ttl_days = int(os.environ['DSS_BLOB_TTL_DAYS'])
                mock_creation_date.return_value = now - datetime.timedelta(
                    days=blob_ttl_days, hours=1, minutes=5)
                self.assertGetResponse(url,
                                       requests.codes.moved,
                                       headers=get_auth_header(),
                                       redirect_follow_retries=0)
            test_checkout()

        with self.subTest(
                f"{replica}: Initiates checkout and returns 302 immediately for GET on stale checkout file."
        ):
            now = datetime.datetime.now(datetime.timezone.utc)
            creation_date = handle.get_creation_date(replica.checkout_bucket,
                                                     file_key)
            creation_date_fn = (
                "cloud_blobstore.s3.S3BlobStore.get_creation_date"
                if replica.name == "aws" else
                "cloud_blobstore.gs.GSBlobStore.get_creation_date")
            with mock.patch(creation_date_fn) as mock_creation_date:
                # assert 302 found on stale file and that last modified refreshes
                blob_ttl_days = int(os.environ['DSS_BLOB_PUBLIC_TTL_DAYS'])
                mock_creation_date.return_value = now - datetime.timedelta(
                    days=blob_ttl_days + 1)
                self.assertGetResponse(url,
                                       requests.codes.found,
                                       headers=get_auth_header(),
                                       redirect_follow_retries=0)
                self.assertTrue(
                    creation_date > handle.get_creation_date(
                        replica.checkout_bucket, file_key),
                    f'\ncurr_creation_date: {creation_date}'
                    f'\nprev_creation_date: {handle.get_creation_date(replica.checkout_bucket)}'
                )

        handle.delete(test_bucket, f"files/{file_uuid}.{version}")
        handle.delete(replica.checkout_bucket, file_key)
示例#29
0
    def _test_file_put(self, replica: Replica, scheme: str, test_bucket: str,
                       uploader: Uploader):
        src_key = generate_test_key()
        src_data = os.urandom(1024)
        with tempfile.NamedTemporaryFile(delete=True) as fh:
            fh.write(src_data)
            fh.flush()

            uploader.checksum_and_upload_file(fh.name, src_key, "text/plain")

        source_url = f"{scheme}://{test_bucket}/{src_key}"

        file_uuid = str(uuid.uuid4())
        bundle_uuid = str(uuid.uuid4())
        version = datetime_to_version_format(datetime.datetime.utcnow())

        self._test_put_auth_errors(scheme, test_bucket)

        with self.subTest(
                f"{replica}: Created returned when uploading a file with a unique payload, and FQID"
        ):
            self.upload_file(source_url,
                             file_uuid,
                             bundle_uuid=bundle_uuid,
                             version=version)

        with self.subTest(
                f"{replica}: Created returned when uploading a file with same payload, and different FQID"
        ):
            self.upload_file(source_url, str(uuid.uuid4()))

        with self.subTest(
                f"{replica}: OK returned when uploading a file with the same payload, UUID,  version"
        ):
            self.upload_file(source_url,
                             file_uuid,
                             bundle_uuid=bundle_uuid,
                             version=version,
                             expected_code=requests.codes.ok)

        with self.subTest(
                f"{replica}: Conflict returned when uploading a file with a different payload and same FQID"
        ):
            src_key_temp = generate_test_key()
            src_data_temp = os.urandom(128)
            with tempfile.NamedTemporaryFile(delete=True) as fh:
                fh.write(src_data_temp)
                fh.flush()

                uploader.checksum_and_upload_file(fh.name, src_key_temp,
                                                  "text/plain")

            source_url_temp = f"{scheme}://{test_bucket}/{src_key_temp}"
            self.upload_file(source_url_temp,
                             file_uuid,
                             version=version,
                             expected_code=requests.codes.conflict)

        with self.subTest(
                f"{replica}: Bad returned when uploading a file with an invalid version"
        ):
            self.upload_file(source_url,
                             file_uuid,
                             version='',
                             expected_code=requests.codes.bad_request)

        invalid_version = 'ABCD'
        with self.subTest(
                f"{replica}: bad_request returned "
                f"when uploading a file with invalid version {invalid_version}"
        ):
            self.upload_file(source_url,
                             file_uuid,
                             version=invalid_version,
                             expected_code=requests.codes.bad_request)

        with self.subTest(
                f"{replica}: Bad returned when uploading a file without a version"
        ):
            self.upload_file(source_url,
                             file_uuid,
                             version='missing',
                             expected_code=requests.codes.bad_request)

        invalid_uuids = ['ABCD', '1234']
        for invalid_uuid in invalid_uuids:
            with self.subTest(
                    f"{replica}: Bad returned "
                    f"when uploading a file with invalid UUID {invalid_uuid}"):
                self.upload_file(source_url,
                                 invalid_uuid,
                                 expected_code=requests.codes.bad_request)

        with self.subTest(
                f"{replica}: forbidden returned "
                f"when uploading a file with without UUID {invalid_uuid}"):
            self.upload_file(source_url,
                             '',
                             expected_code=requests.codes.forbidden)
示例#30
0
def put(uuid: str, json_request_body: dict, version: str = None):
    class CopyMode(Enum):
        NO_COPY = auto()
        COPY_INLINE = auto()
        COPY_ASYNC = auto()

    uuid = uuid.lower()
    if version is not None:
        # convert it to date-time so we can format exactly as the system requires (with microsecond precision)
        timestamp = iso8601.parse_date(version)
    else:
        timestamp = datetime.datetime.utcnow()
    version = datetime_to_version_format(timestamp)

    source_url = json_request_body['source_url']
    cre = re.compile("^"
                     "(?P<schema>(?:s3|gs|wasb))"
                     "://"
                     "(?P<bucket>[^/]+)"
                     "/"
                     "(?P<key>.+)"
                     "$")
    mobj = cre.match(source_url)
    if mobj and mobj.group('schema') == "s3":
        replica = Replica.aws
    elif mobj and mobj.group('schema') == "gs":
        replica = Replica.gcp
    else:
        schema = mobj.group('schema')
        raise DSSException(requests.codes.bad_request, "unknown_source_schema",
                           f"source_url schema {schema} not supported")

    handle = Config.get_blobstore_handle(replica)
    hca_handle = Config.get_hcablobstore_handle(replica)
    dst_bucket = replica.bucket

    src_bucket = mobj.group('bucket')
    src_key = mobj.group('key')

    metadata = handle.get_user_metadata(src_bucket, src_key)
    size = handle.get_size(src_bucket, src_key)
    content_type = handle.get_content_type(src_bucket, src_key)

    # format all the checksums so they're lower-case.
    for metadata_spec in HCABlobStore.MANDATORY_METADATA.values():
        if metadata_spec['downcase']:
            keyname = typing.cast(str, metadata_spec['keyname'])
            metadata[keyname] = metadata[keyname].lower()

    # what's the target object name for the actual data?
    dst_key = ("blobs/" + ".".join((
        metadata['hca-dss-sha256'],
        metadata['hca-dss-sha1'],
        metadata['hca-dss-s3_etag'],
        metadata['hca-dss-crc32c'],
    ))).lower()

    # does it exist? if so, we can skip the copy part.
    copy_mode = CopyMode.COPY_INLINE
    try:
        if hca_handle.verify_blob_checksum(dst_bucket, dst_key, metadata):
            copy_mode = CopyMode.NO_COPY
    except BlobNotFoundError:
        pass

    # build the json document for the file metadata.
    file_metadata = {
        FileMetadata.FORMAT: FileMetadata.FILE_FORMAT_VERSION,
        FileMetadata.BUNDLE_UUID: json_request_body['bundle_uuid'],
        FileMetadata.CREATOR_UID: json_request_body['creator_uid'],
        FileMetadata.VERSION: version,
        FileMetadata.CONTENT_TYPE: content_type,
        FileMetadata.SIZE: size,
        FileMetadata.CRC32C: metadata['hca-dss-crc32c'],
        FileMetadata.S3_ETAG: metadata['hca-dss-s3_etag'],
        FileMetadata.SHA1: metadata['hca-dss-sha1'],
        FileMetadata.SHA256: metadata['hca-dss-sha256'],
    }
    file_metadata_json = json.dumps(file_metadata)

    if copy_mode != CopyMode.NO_COPY and size > ASYNC_COPY_THRESHOLD:
        copy_mode = CopyMode.COPY_ASYNC

    if copy_mode == CopyMode.COPY_ASYNC:
        if replica == Replica.aws:
            state = s3copyclient.copy_write_metadata_sfn_event(
                src_bucket,
                src_key,
                dst_bucket,
                dst_key,
                uuid,
                version,
                file_metadata_json,
            )
            state_machine_name_template = "dss-s3-copy-write-metadata-sfn-{stage}"
        elif replica == Replica.gcp:
            state = gscopyclient.copy_write_metadata_sfn_event(
                src_bucket,
                src_key,
                dst_bucket,
                dst_key,
                uuid,
                version,
                file_metadata_json,
            )
            state_machine_name_template = "dss-gs-copy-write-metadata-sfn-{stage}"
        else:
            raise ValueError("Unhandled replica")

        execution_id = str(uuid4())
        stepfunctions.step_functions_invoke(state_machine_name_template,
                                            execution_id, state)
        return jsonify(dict(task_id=execution_id,
                            version=version)), requests.codes.accepted
    elif copy_mode == CopyMode.COPY_INLINE:
        handle.copy(src_bucket, src_key, dst_bucket, dst_key)

        # verify the copy was done correctly.
        assert hca_handle.verify_blob_checksum(dst_bucket, dst_key, metadata)

    try:
        write_file_metadata(handle, dst_bucket, uuid, version,
                            file_metadata_json)
        status_code = requests.codes.created
    except BlobAlreadyExistsError:
        # fetch the file metadata, compare it to what we have.
        existing_file_metadata = json.loads(
            handle.get(dst_bucket,
                       "files/{}.{}".format(uuid, version)).decode("utf-8"))
        if existing_file_metadata != file_metadata:
            raise DSSException(
                requests.codes.conflict, "file_already_exists",
                f"file with UUID {uuid} and version {version} already exists")
        status_code = requests.codes.ok

    return jsonify(dict(version=version)), status_code