Пример #1
0
    def duplicate(self, to_step):
        basename = self.key.split("/")[-1]
        key = f"{to_step.workflow_id}/{to_step.id}/{basename}"
        s3.copy(s3.StoredObjectsBucket, key, f"{s3.StoredObjectsBucket}/{self.key}")

        return to_step.stored_objects.create(
            stored_at=self.stored_at, hash=self.hash, key=key, size=self.size
        )
Пример #2
0
def _finish_upload(data: Dict[str, Any]) -> Dict[str, Any]:
    """Create an UploadedFile by moving data out of tusd's bucket.

    Return kwargs for SetStepParams.
    """
    # SECURITY: we expect metadata to come from Workbench itself. (On
    # production, there's no route from the Internet to tusd's POST endpoint.)
    # However, let's cast to correct types just to be safe. If a miscreant
    # comes along, that'll cause a 500 error and we'll be notified. (Better
    # than sending untrusted data to Django ORM.)
    # Raise TypeError, KeyError, ValueError.
    filename = str(data["MetaData"]["filename"])
    api_token = str(data["MetaData"]["apiToken"])
    workflow_id = int(data["MetaData"]["workflowId"])
    step_slug = data["MetaData"]["stepSlug"]
    size = int(data["Size"])
    bucket = str(data["Storage"]["Bucket"])
    key = str(data["Storage"]["Key"])

    if bucket != s3.TusUploadBucket:
        # security: if a hijacker manages to craft a request here, prevent its
        # creator from copying a file he/she can't see. (The creator is only
        # known to be able to see `key` of `s3.TusUploadBucket`.)
        raise RuntimeError("SECURITY: did tusd send this request?")

    suffix = PurePath(filename).suffix
    file_uuid = str(uuid.uuid4())
    final_key = None

    with upload.locked_and_loaded_step(workflow_id, step_slug) as (
        workflow,
        step,
        param_id_name,
    ):  # raise UploadError
        # Ensure upload's API token is the same as the one we sent tusd.
        #
        # This doesn't give security: an attacker can simulate a request from
        # tusd with api_token=None and it will look like a browser-initiated
        # one.
        #
        # It's for timing: if the user resets a module's API token, we should
        # disallow all prior uploads.
        if api_token:  # empty when React client uploads
            upload.raise_if_api_token_is_wrong(step, api_token)  # raise UploadError

        final_key = step.uploaded_file_prefix + str(file_uuid) + suffix

        # Tricky leak here: if there's an exception or crash, the transaction
        # is reverted. final_key will remain in S3 but the database won't point
        # to it.
        #
        # Not a huge deal, because `final_key` is in the Step's own directory.
        # The user can delete all leaked files by deleting the Step.
        s3.copy(
            s3.UserFilesBucket,
            final_key,
            f"{bucket}/{key}",
            MetadataDirective="REPLACE",
            ContentDisposition=s3.encode_content_disposition(filename),
            ContentType="application/octet-stream",
        )

        step.uploaded_files.create(
            name=filename, size=size, uuid=file_uuid, key=final_key
        )
        delete_old_files_to_enforce_storage_limits(step=step)
        s3.remove(bucket, key)

    return dict(
        workflow_id=workflow_id, step=step, new_values={param_id_name: file_uuid}
    )
Пример #3
0
    def _duplicate_with_slug_and_delta_id(self, to_tab, slug, last_relevant_delta_id):
        # Initialize but don't save
        new_step = Step(
            tab=to_tab,
            slug=slug,
            module_id_name=self.module_id_name,
            fetch_errors=self.fetch_errors,
            stored_data_version=self.stored_data_version,
            order=self.order,
            notes=self.notes,
            is_collapsed=self.is_collapsed,
            auto_update_data=False,
            next_update=None,
            update_interval=self.update_interval,
            last_update_check=self.last_update_check,
            last_relevant_delta_id=last_relevant_delta_id,
            params=self.params,
            secrets={},  # DO NOT COPY SECRETS
        )

        # Copy cached render result, if there is one.
        #
        # If we duplicate a Workflow mid-render, the cached render result might
        # not have any useful data. But that's okay: just kick off a new
        # render. The common case (all-rendered Workflow) will produce a
        # fully-rendered duplicate Workflow.
        #
        # We cannot copy the cached result if the destination Tab has a
        # different name than this one: tab_name is passed to render(), so even
        # an exactly-duplicated Step can have a different output.
        cached_result = self.cached_render_result
        if cached_result is not None and self.tab.name == to_tab.name:
            # assuming file-copy succeeds, copy cached results.
            new_step.cached_render_result_delta_id = new_step.last_relevant_delta_id
            for attr in ("status", "errors", "json", "columns", "nrows"):
                full_attr = f"cached_render_result_{attr}"
                setattr(new_step, full_attr, getattr(self, full_attr))

            new_step.save()  # so there is a new_step.id for parquet_key

            # Now new_step.cached_render_result will return a
            # CachedRenderResult, because all the DB values are set. It'll have
            # a .parquet_key ... but there won't be a file there (because we
            # never wrote it).
            from cjwstate.rendercache.io import BUCKET, crr_parquet_key

            old_parquet_key = crr_parquet_key(cached_result)
            new_parquet_key = crr_parquet_key(new_step.cached_render_result)

            try:
                s3.copy(
                    s3.CachedRenderResultsBucket,
                    new_parquet_key,
                    "%(Bucket)s/%(Key)s" % {"Bucket": BUCKET, "Key": old_parquet_key},
                )
            except s3.layer.error.NoSuchKey:
                # DB and filesystem are out of sync. CachedRenderResult handles
                # such cases gracefully. So `new_result` will behave exactly
                # like `cached_result`.
                pass
        else:
            new_step.save()

        # Duplicate the current stored data only, not the history
        if self.stored_data_version is not None:
            self.stored_objects.get(stored_at=self.stored_data_version).duplicate(
                new_step
            )

        # For each "file" param, duplicate the "selected" uploaded_file if there
        # is one.
        #
        # We assume any UUID in `params` that points to an uploaded file _is_
        # a file-dtype param. ([adamhooper, 2020-07-14] when the assumption does
        # not hold, will this cause DB errors? Not sure, but it's not a security
        # risk.)
        #
        # Why not check the param schema? Because we'd need to define behavior
        # for when the module doesn't exist, or its version is changed, or its
        # code breaks.... bah! These behaviors don't line up with any user
        # expectations. Users want to copy the thing they see.
        for uuid_str in self.params.values():
            if not isinstance(uuid_str, str):
                continue
            try:
                UUID(uuid_str)
            except ValueError:
                continue
            uploaded_file = self.uploaded_files.filter(uuid=uuid_str).first()
            if not uploaded_file:
                continue

            new_key = uploaded_file.key.replace(
                self.uploaded_file_prefix, new_step.uploaded_file_prefix
            )
            assert new_key != uploaded_file.key
            # TODO handle file does not exist
            s3.copy(
                s3.UserFilesBucket,
                new_key,
                f"{s3.UserFilesBucket}/{uploaded_file.key}",
            )
            new_step.uploaded_files.create(
                created_at=uploaded_file.created_at,
                name=uploaded_file.name,
                size=uploaded_file.size,
                uuid=uploaded_file.uuid,
                key=new_key,
            )

        return new_step