Пример #1
0
    def test_db_s3_syntax_error_is_runtime_error(self):
        mv = create_or_replace_from_spec(
            {
                "id_name": "regtest9",
                "name": "regtest9 v1",
                "category": "Clean",
                "parameters": [{
                    "id_name": "url",
                    "type": "string"
                }],
            },
            source_version_hash="b1c2d3",
        )
        bio = io.BytesIO()
        with zipfile.ZipFile(bio, mode="w") as zf:
            zf.writestr(
                "regtest9.yaml",
                json.dumps({
                    **mv.spec, "parameters": "not an Array"
                }).encode("utf-8"),
            )
            zf.writestr("regtest9.py", b"def render(")
        s3.put_bytes(
            s3.ExternalModulesBucket,
            "regtest9/regtest9.b1c2d3.zip",
            bytes(bio.getbuffer()),
        )

        with self.assertRaises(RuntimeError) as cm:
            MODULE_REGISTRY.latest("regtest9")
        self.assertIsInstance(cm.exception.__cause__, SyntaxError)
Пример #2
0
    def test_db_s3_validate_code_with_kernel(self):
        mv = create_or_replace_from_spec(
            {
                "id_name": "regtest7",
                "name": "regtest7 v1",
                "category": "Clean",
                "parameters": [{
                    "id_name": "url",
                    "type": "string"
                }],
            },
            source_version_hash="b1c2d3",
        )
        bio = io.BytesIO()
        with zipfile.ZipFile(bio, mode="w") as zf:
            zf.writestr("regtest7.yaml", json.dumps(mv.spec).encode("utf-8"))
            zf.writestr(
                "regtest7.py",
                b"def render(table, params):\n    return table\nfoo()")
        s3.put_bytes(
            s3.ExternalModulesBucket,
            "regtest7/regtest7.b1c2d3.zip",
            bytes(bio.getbuffer()),
        )

        with self.assertRaises(RuntimeError) as cm:
            MODULE_REGISTRY.latest("regtest7")
        self.assertIsInstance(cm.exception.__cause__, ModuleExitedError)
Пример #3
0
    def test_db_s3_use_cache_for_same_version(self):
        mv = create_or_replace_from_spec(
            {
                "id_name": "regtest4",
                "name": "regtest4 v1",
                "category": "Clean",
                "parameters": [{
                    "id_name": "url",
                    "type": "string"
                }],
            },
            source_version_hash="b1c2d2",
        )
        bio = io.BytesIO()
        with zipfile.ZipFile(bio, mode="w") as zf:
            zf.writestr("regtest4.yaml", json.dumps(mv.spec).encode("utf-8"))
            zf.writestr("regtest4.py",
                        b"def render(table, params):\n    return table")
        s3.put_bytes(
            s3.ExternalModulesBucket,
            "regtest4/regtest4.b1c2d2.zip",
            bytes(bio.getbuffer()),
        )

        zf1 = MODULE_REGISTRY.latest("regtest4")
        zf2 = MODULE_REGISTRY.latest("regtest4")
        self.assertIs(zf2, zf1)
 def test_delete_deletes_from_s3(self):
     s3.put_bytes(s3.StoredObjectsBucket, "test.dat", b"abcd")
     workflow = Workflow.create_and_init()
     step = workflow.tabs.first().steps.create(order=0, slug="step-1")
     so = step.stored_objects.create(size=4, key="test.dat")
     so.delete()
     self.assertFalse(s3.exists(s3.StoredObjectsBucket, "test.dat"))
Пример #5
0
 def test_clean_file_safe_filename(self):
     workflow = Workflow.create_and_init()
     tab = workflow.tabs.first()
     step = tab.steps.create(module_id_name="uploadfile",
                             order=0,
                             slug="step-1")
     key = f"wf-${workflow.id}/wfm-${step.id}/6e00511a-8ac4-4b72-9acc-9d069992b5cf"
     s3.put_bytes(s3.UserFilesBucket, key, b"1234")
     model = UploadedFileModel.objects.create(
         step=step,
         name="/etc/passwd.$/etc/passwd",
         size=4,
         uuid="6e00511a-8ac4-4b72-9acc-9d069992b5cf",
         key=key,
     )
     with ExitStack() as inner_stack:
         result = self._call_prep_params(
             ParamSchema.Dict({"file": ParamSchema.File()}),
             {"file": "6e00511a-8ac4-4b72-9acc-9d069992b5cf"},
             step_id=step.id,
             exit_stack=inner_stack,
         )
         self.assertEqual(
             result.uploaded_files["6e00511a-8ac4-4b72-9acc-9d069992b5cf"],
             UploadedFile(
                 "/etc/passwd.$/etc/passwd",
                 "6e00511a-8ac4-4b72-9acc-9d069992b5cf_-etc-passwd.--etc-passwd",
                 model.created_at,
             ),
         )
Пример #6
0
 def test_pre_finish_no_op_when_api_token_is_off(self):
     _init_module("x")
     self.kernel.migrate_params.side_effect = lambda m, p: p
     workflow = Workflow.create_and_init()
     step = workflow.tabs.first().steps.create(
         order=0,
         slug="step-123",
         module_id_name="x",
         file_upload_api_token="abc123",
         params={"file": None},
     )
     s3.put_bytes(s3.TusUploadBucket, "data", b"1234567")
     response = self.client.post(
         f"/tusd-hooks",
         {
             "Upload": {
                 "MetaData": {
                     "filename": "foo.csv",
                     "workflowId": str(workflow.id),
                     "stepSlug": step.slug,
                     "apiToken": "an-out-of-date-token",
                 },
                 "Size": 7,
                 "Storage": {"Bucket": s3.TusUploadBucket, "Key": "data"},
             }
         },
         HTTP_HOOK_NAME="pre-finish",
         content_type="application/json",
     )
     self.assertEqual(response.status_code, 403)
     self.assertEqual(
         response.json(), {"error": {"code": "authorization-bearer-token-invalid"}}
     )
     # File was not created
     self.assertEqual(0, step.uploaded_files.count())
Пример #7
0
 def test_delete_remove_uploaded_data_by_prefix_in_case_model_missing(self):
     workflow = Workflow.create_and_init()
     step = workflow.tabs.first().steps.create(order=0, slug="step-1")
     uuid = str(uuidgen.uuid4())
     key = step.uploaded_file_prefix + uuid
     s3.put_bytes(s3.UserFilesBucket, key, b"A\n1")
     # Don't create the UploadedFile. Simulates races during upload/delete
     # that could write a file on S3 but not in our database.
     # step.uploaded_files.create(name='t.csv', size=3, uuid=uuid, key=key)
     step.delete()  # do not crash
     self.assertFalse(s3.exists(s3.UserFilesBucket, key))
Пример #8
0
    def test_resume_backtrack_on_corrupt_cache_error(self):
        module_zipfile = create_module_zipfile(
            "mod", spec_kwargs={"loads_data": True})
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        # step1: cached result is fresh -- but CORRUPT
        step1 = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        write_to_rendercache(workflow, step1, workflow.last_delta_id,
                             make_table(make_column("A", [1])))
        step1.refresh_from_db()
        s3.put_bytes(
            # Write corrupted data -- will lead to CorruptCacheError
            rendercache.io.BUCKET,
            rendercache.io.crr_parquet_key(step1.cached_render_result),
            b"CORRUPT",
        )
        # step2: no cached result -- must re-render
        step2 = tab.steps.create(order=1, slug="step-2", module_id_name="mod")

        tab_flow = TabFlow(
            Tab(tab.slug, tab.name),
            [
                ExecuteStep(step1, module_zipfile, {}),
                ExecuteStep(step2, module_zipfile, {}),
            ],
        )

        new_table = make_table(make_column("B", ["b"]))

        with patch.object(Kernel, "render",
                          side_effect=mock_render(new_table)):
            with self._execute(workflow,
                               tab_flow, {},
                               expect_log_level=logging.ERROR) as (result,
                                                                   path):
                self.assertEqual(
                    result, StepResult(path, [Column("B", ColumnType.Text())]))

            self.assertEqual(
                # called with step1, then step2
                Kernel.render.call_count,
                2,
            )
            self.assertRegex(
                # Output is to the correct file
                Kernel.render.call_args[1]["output_filename"],
                r"execute-tab-output.*\.arrow",
            )
Пример #9
0
def create_module_zipfile(
    module_id: str = "testmodule",
    *,
    version: Optional[str] = None,
    spec_kwargs: Dict[str, Any] = {},
    python_code: str = "",
    html: Optional[str] = None,
    js_module: str = "",
    extra_file_contents: Dict[str, bytes] = {},
) -> ModuleZipfile:
    """
    Create a ModuleZipfile, stored in the database and s3.

    If `version` is not supplied, generate one using the sha1 of the zipfile.
    This is usually what you want: s3 reads on overwrites are _eventually_
    consistent, so if you 1. write a file; 2. overwrite it; and 3. read it, the
    read might result in the file from step 1 or the file from step 2. A sha1
    version means overwrites will never modify data, solving the problem.
    """
    spec = {
        "id_name": module_id,
        "name": "Test Module",
        "category": "Clean",
        "parameters": [],
        **spec_kwargs,
    }

    bio = io.BytesIO()
    with zipfile.ZipFile(bio, mode="w") as zf:
        zf.writestr(module_id + ".yaml", json.dumps(spec))
        zf.writestr(module_id + ".py", python_code.encode("utf-8"))
        if html is not None:
            zf.writestr(module_id + ".html", html.encode("utf-8"))
        if js_module:
            zf.writestr(module_id + ".js", js_module.encode("utf-8"))
        for path, content in extra_file_contents.items():
            zf.writestr(path, content)
    data = bytes(bio.getbuffer())
    if version is None:
        sha1 = hashlib.sha1()
        sha1.update(data)
        version = sha1.hexdigest()

    s3.put_bytes(
        s3.ExternalModulesBucket,
        "%s/%s.%s.zip" % (module_id, module_id, version),
        data,
    )
    ModuleVersion.objects.create(id_name=module_id,
                                 source_version_hash=version,
                                 spec=spec,
                                 js_module=js_module)
    return MODULE_REGISTRY.latest(module_id)
Пример #10
0
    def test_resume_backtrack_on_corrupt_cache_error(self):
        module_zipfile = create_module_zipfile("mod", spec_kwargs={"loads_data": True})
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        # step1: cached result is fresh -- but CORRUPT
        step1 = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        rendercache.cache_render_result(
            workflow,
            step1,
            workflow.last_delta_id,
            RenderResult(arrow_table({"A": [1]})),
        )
        s3.put_bytes(
            # Write corrupted data -- will lead to CorruptCacheError
            rendercache.io.BUCKET,
            rendercache.io.crr_parquet_key(step1.cached_render_result),
            b"CORRUPT",
        )
        # step2: no cached result -- must re-render
        step2 = tab.steps.create(order=1, slug="step-2", module_id_name="mod")

        tab_flow = TabFlow(
            tab.to_arrow(),
            [
                ExecuteStep(step1, module_zipfile, {}),
                ExecuteStep(step2, module_zipfile, {}),
            ],
        )

        with patch.object(Kernel, "render", side_effect=mock_render({"B": [2]})):
            with self._execute(
                workflow, tab_flow, {}, expect_log_level=logging.ERROR
            ) as result:
                expected = RenderResult(arrow_table({"B": [2]}))
                assert_render_result_equals(result, expected)

            self.assertEqual(
                # called with step1, then step2
                Kernel.render.call_count,
                2,
            )
            self.assertRegex(
                # Output is to the correct file
                Kernel.render.call_args[1]["output_filename"],
                r"execute-tab-output.*\.arrow",
            )
Пример #11
0
    def test_db_s3_refresh_cache_for_new_version(self):
        v1 = create_or_replace_from_spec(
            {
                "id_name": "regtest5",
                "name": "regtest5 v1",
                "category": "Clean",
                "parameters": [{
                    "id_name": "url",
                    "type": "string"
                }],
            },
            source_version_hash="b1c2d2",
        )
        bio = io.BytesIO()
        with zipfile.ZipFile(bio, mode="w") as zf:
            zf.writestr("regtest5.yaml", json.dumps(v1.spec).encode("utf-8"))
            zf.writestr("regtest5.py",
                        b"def render(table, params):\n    return table")
        s3.put_bytes(
            s3.ExternalModulesBucket,
            "regtest5/regtest5.b1c2d2.zip",
            bytes(bio.getbuffer()),
        )

        zipfile1 = MODULE_REGISTRY.latest("regtest5")

        create_or_replace_from_spec(
            {
                "id_name": "regtest5",
                "name": "regtest5 v2",
                "category": "Clean",
                "parameters": [{
                    "id_name": "url",
                    "type": "string"
                }],
            },
            source_version_hash="b1c2d3",
        )
        s3.put_bytes(
            s3.ExternalModulesBucket,
            "regtest5/regtest5.b1c2d3.zip",
            bytes(bio.getbuffer()),  # reuse zipfile to save lines of code
        )

        zipfile2 = MODULE_REGISTRY.latest("regtest5")

        self.assertIsNot(zipfile2, zipfile1)
        self.assertEqual(zipfile2.version, "b1c2d3")
Пример #12
0
 def test_invalid_parquet_is_corrupt_cache_error(self):
     with arrow_table_context(make_column("A", ["x"])) as (path, table):
         result = LoadedRenderResult(
             path=path,
             table=table,
             columns=[Column("A", ColumnType.Text())],
             errors=[],
             json={},
         )
         cache_render_result(self.workflow, self.step, 1, result)
     crr = self.step.cached_render_result
     s3.put_bytes(BUCKET, crr_parquet_key(crr), b"NOT PARQUET")
     with tempfile_context() as arrow_path:
         with self.assertRaises(CorruptCacheError):
             with open_cached_render_result(crr) as loaded:
                 pass
    def test_duplicate_bytes(self):
        key = f"{self.workflow.id}/{self.step1.id}/{uuid1()}"
        s3.put_bytes(s3.StoredObjectsBucket, key, b"12345")
        self.step2 = self.step1.tab.steps.create(order=1, slug="step-2")
        so1 = self.step1.stored_objects.create(key=key, size=5)
        so2 = so1.duplicate(self.step2)

        # new StoredObject should have same time,
        # different file with same contents
        self.assertEqual(so2.stored_at, so1.stored_at)
        self.assertEqual(so2.size, so1.size)
        self.assertNotEqual(so2.key, so1.key)
        self.assertEqual(
            get_s3_object_with_data(s3.StoredObjectsBucket, so2.key)["Body"],
            b"12345",
        )
Пример #14
0
    def test_clean_file_happy_path(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(module_id_name="uploadfile",
                                order=0,
                                slug="step-1")
        key = f"wf-${workflow.id}/wfm-${step.id}/6e00511a-8ac4-4b72-9acc-9d069992b5cf"
        s3.put_bytes(s3.UserFilesBucket, key, b"1234")
        model = UploadedFileModel.objects.create(
            step=step,
            name="x.csv.gz",
            size=4,
            uuid="6e00511a-8ac4-4b72-9acc-9d069992b5cf",
            key=key,
        )
        with ExitStack() as inner_stack:
            result = self._call_prep_params(
                ParamSchema.Dict({"file": ParamSchema.File()}),
                {"file": "6e00511a-8ac4-4b72-9acc-9d069992b5cf"},
                step_id=step.id,
                exit_stack=inner_stack,
            )
            self.assertEqual(
                result,
                PrepParamsResult(
                    {"file": "6e00511a-8ac4-4b72-9acc-9d069992b5cf"},
                    tab_outputs={},
                    uploaded_files={
                        "6e00511a-8ac4-4b72-9acc-9d069992b5cf":
                        UploadedFile(
                            "x.csv.gz",
                            "6e00511a-8ac4-4b72-9acc-9d069992b5cf_x.csv.gz",
                            model.created_at,
                        )
                    },
                ),
            )
            self.assertEqual(
                (self.basedir /
                 "6e00511a-8ac4-4b72-9acc-9d069992b5cf_x.csv.gz").read_bytes(),
                b"1234",
            )

        # Assert that once `exit_stack` goes out of scope, file is deleted
        self.assertFalse(
            (self.basedir /
             "6e00511a-8ac4-4b72-9acc-9d069992b5cf_x.csv.gz").exists())
Пример #15
0
 def test_clean_file_wrong_step(self):
     workflow = Workflow.create_and_init()
     tab = workflow.tabs.first()
     step = tab.steps.create(module_id_name="uploadfile", order=0, slug="step-1")
     step2 = tab.steps.create(module_id_name="uploadfile", order=1, slug="step-2")
     id = str(uuid.uuid4())
     key = f"wf-${workflow.id}/wfm-${step.id}/${id}"
     s3.put_bytes(s3.UserFilesBucket, key, b"1234")
     UploadedFile.objects.create(
         step=step2, name="x.csv.gz", size=4, uuid=id, key=key
     )
     context = self._render_context(step_id=step.id)
     result = clean_value(ParamDType.File(), id, context)
     self.assertIsNone(result)
     # Assert that if a temporary file was created to house the download, it
     # no longer exists.
     self.assertListEqual(list(self.basedir.iterdir()), [])
Пример #16
0
    def test_clean_file_happy_path(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(module_id_name="uploadfile", order=0, slug="step-1")
        id = str(uuid.uuid4())
        key = f"wf-${workflow.id}/wfm-${step.id}/${id}"
        s3.put_bytes(s3.UserFilesBucket, key, b"1234")
        UploadedFile.objects.create(
            step=step, name="x.csv.gz", size=4, uuid=id, key=key
        )
        with ExitStack() as inner_stack:
            context = self._render_context(step_id=step.id, exit_stack=inner_stack)
            result: Path = clean_value(ParamDType.File(), id, context)
            self.assertIsInstance(result, Path)
            self.assertEqual(result.read_bytes(), b"1234")
            self.assertEqual(result.suffixes, [".csv", ".gz"])

        # Assert that once `exit_stack` goes out of scope, file is deleted
        self.assertFalse(result.exists())
Пример #17
0
    def test_db_s3_latest_order_by_last_update_time(self):
        # old version
        create_or_replace_from_spec(
            {
                "id_name": "regtest1",
                "name": "regtest1 v1",
                "category": "Clean",
                "parameters": [{
                    "id_name": "url",
                    "type": "string"
                }],
            },
            source_version_hash="b1c2d3",
        )
        time.sleep(0.000002)  # guarantee new timestamp
        # new version
        v2 = create_or_replace_from_spec(
            {
                "id_name": "regtest1",
                "name": "regtest1 v2",
                "category": "Clean",
                "parameters": [{
                    "id_name": "url",
                    "type": "string"
                }],
            },
            source_version_hash="b1c2d2",
        )
        bio = io.BytesIO()
        with zipfile.ZipFile(bio, mode="w") as zf:
            zf.writestr("regtest1.yaml", json.dumps(v2.spec).encode("utf-8"))
            zf.writestr("regtest1.py",
                        b"def render(table, params):\n    return table")
        s3.put_bytes(
            s3.ExternalModulesBucket,
            "regtest1/regtest1.b1c2d2.zip",
            bytes(bio.getbuffer()),
        )

        zf = MODULE_REGISTRY.latest("regtest1")
        self.assertEqual(zf.get_spec(), load_spec(v2.spec))
Пример #18
0
    def test_step_duplicate_copy_only_selected_uploaded_file(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(order=0, slug="step-1", module_id_name="upload")
        uuid1 = str(uuidgen.uuid4())
        key1 = f"{step.uploaded_file_prefix}{uuid1}.csv"
        s3.put_bytes(s3.UserFilesBucket, key1, b"1234567")
        uuid2 = str(uuidgen.uuid4())
        key2 = f"{step.uploaded_file_prefix}{uuid2}.csv"
        s3.put_bytes(s3.UserFilesBucket, key2, b"7654321")
        uuid3 = str(uuidgen.uuid4())
        key3 = f"{step.uploaded_file_prefix}{uuid3}.csv"
        s3.put_bytes(s3.UserFilesBucket, key3, b"9999999")
        step.uploaded_files.create(name="t1.csv", uuid=uuid1, key=key1, size=7)
        step.uploaded_files.create(name="t2.csv", uuid=uuid2, key=key2, size=7)
        step.uploaded_files.create(name="t3.csv", uuid=uuid3, key=key3, size=7)
        # Write the _middle_ uuid to the old module -- proving that we aren't
        # selecting by ordering
        step.params = {"file": uuid2, "has_header": True}
        step.save(update_fields=["params"])

        workflow2 = Workflow.create_and_init()
        tab2 = workflow2.tabs.first()
        step2 = step.duplicate_into_new_workflow(tab2)

        self.assertEqual(step2.uploaded_files.count(), 1)
        new_uf = step2.uploaded_files.first()
        self.assertEqual(new_uf.uuid, uuid2)
Пример #19
0
    def test_step_duplicate_copy_uploaded_file(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(order=0,
                                slug="step-1",
                                module_id_name="upload")
        uuid = str(uuidgen.uuid4())
        key = f"{step.uploaded_file_prefix}{uuid}.csv"
        s3.put_bytes(s3.UserFilesBucket, key, b"1234567")
        # Write the uuid to the old module -- we'll check the new module points
        # to a valid file
        step.params = {"file": uuid, "has_header": True}
        step.save(update_fields=["params"])
        uploaded_file = step.uploaded_files.create(name="t.csv",
                                                   uuid=uuid,
                                                   key=key,
                                                   size=7)

        workflow2 = Workflow.create_and_init()
        tab2 = workflow2.tabs.first()
        step2 = step.duplicate_into_new_workflow(tab2)

        uploaded_file2 = step2.uploaded_files.first()
        self.assertIsNotNone(uploaded_file2)
        # New file gets same uuid -- because it's the same file and we don't
        # want to edit params during copy
        self.assertEqual(uploaded_file2.uuid, uuid)
        self.assertEqual(step2.params["file"], uuid)
        self.assertTrue(
            # The new file should be in a different path
            uploaded_file2.key.startswith(step2.uploaded_file_prefix))
        self.assertEqual(uploaded_file2.name, "t.csv")
        self.assertEqual(uploaded_file2.size, 7)
        self.assertEqual(uploaded_file2.created_at, uploaded_file.created_at)
        self.assertEqual(
            get_s3_object_with_data(s3.UserFilesBucket,
                                    uploaded_file2.key)["Body"],
            b"1234567",
        )
Пример #20
0
    def test_pre_finish_enforce_storage_limits(self, send_update):
        send_update.side_effect = async_noop

        _init_module("x")
        self.kernel.migrate_params.side_effect = lambda m, p: p
        workflow = Workflow.create_and_init()
        step = workflow.tabs.first().steps.create(
            order=0,
            slug="step-123",
            module_id_name="x",
            file_upload_api_token="abc123",
            params={"file": None},
        )
        s3.put_bytes(s3.UserFilesBucket, "foo/1.txt", b"1")
        step.uploaded_files.create(
            created_at=datetime.datetime(2020, 1, 1),
            name="file1.txt",
            size=1,
            uuid="df46244d-268a-0001-9b47-360502dd9b32",
            key="foo/1.txt",
        )
        s3.put_bytes(s3.UserFilesBucket, "foo/2.txt", b"22")
        step.uploaded_files.create(
            created_at=datetime.datetime(2020, 1, 2),
            name="file2.txt",
            size=2,
            uuid="df46244d-268a-0002-9b47-360502dd9b32",
            key="foo/2.txt",
        )
        s3.put_bytes(s3.UserFilesBucket, "foo/3.txt", b"333")
        step.uploaded_files.create(
            created_at=datetime.datetime(2020, 1, 3),
            name="file3.txt",
            size=3,
            uuid="df46244d-268a-0003-9b47-360502dd9b32",
            key="foo/3.txt",
        )

        # Upload the new file, "file4.txt"
        s3.put_bytes(s3.TusUploadBucket, "new-key", b"4444")
        with self.assertLogs(level=logging.INFO):
            # Logs SetStepParams's migrate_params()
            response = self.client.post(
                f"/tusd-hooks",
                {
                    "Upload": {
                        "MetaData": {
                            "filename": "file4.txt",
                            "workflowId": str(workflow.id),
                            "stepSlug": step.slug,
                            "apiToken": "abc123",
                        },
                        "Size": 7,
                        "Storage": {
                            "Bucket": s3.TusUploadBucket,
                            "Key": "new-key"
                        },
                    }
                },
                HTTP_HOOK_NAME="pre-finish",
                content_type="application/json",
            )
        self.assertEqual(response.status_code, 200)

        # Test excess uploaded files were deleted
        self.assertEqual(
            list(
                step.uploaded_files.order_by("id").values_list("name",
                                                               flat=True)),
            ["file3.txt", "file4.txt"],
        )
        self.assertFalse(s3.exists(s3.UserFilesBucket, "foo/1.txt"))
        self.assertFalse(s3.exists(s3.UserFilesBucket, "foo/2.txt"))

        # Test delta nixes old files from clients' browsers
        send_update.assert_called()
        uploaded_file = step.uploaded_files.get(name="file4.txt")
        self.assertEqual(
            send_update.mock_calls[0][1][1].steps[step.id].files,
            [
                clientside.UploadedFile(
                    name="file4.txt",
                    uuid=uploaded_file.uuid,
                    size=7,
                    created_at=uploaded_file.created_at,
                ),
                clientside.UploadedFile(
                    name="file3.txt",
                    uuid="df46244d-268a-0003-9b47-360502dd9b32",
                    size=3,
                    created_at=datetime.datetime(2020, 1, 3),
                ),
            ],
        )
Пример #21
0
 def test_pre_finish_happy_path(self, queue_render, send_update):
     send_update.side_effect = async_noop
     queue_render.side_effect = async_noop
     _init_module("x")
     self.kernel.migrate_params.side_effect = lambda m, p: p
     workflow = Workflow.create_and_init()
     step = workflow.tabs.first().steps.create(
         order=0,
         slug="step-123",
         module_id_name="x",
         file_upload_api_token="abc123",
         params={"file": None},
     )
     s3.put_bytes(s3.TusUploadBucket, "data", b"1234567")
     with self.assertLogs(level=logging.INFO):
         # Logs SetStepParams's migrate_params()
         response = self.client.post(
             f"/tusd-hooks",
             {
                 "Upload": {
                     "MetaData": {
                         "filename": "foo.csv",
                         "workflowId": str(workflow.id),
                         "stepSlug": step.slug,
                         "apiToken": "abc123",
                     },
                     "Size": 7,
                     "Storage": {
                         "Bucket": s3.TusUploadBucket,
                         "Key": "data"
                     },
                 }
             },
             HTTP_HOOK_NAME="pre-finish",
             content_type="application/json",
         )
     self.assertEqual(response.status_code, 200)
     self.assertEqual(response.json(), {})
     # File was created
     uploaded_file = step.uploaded_files.first()
     self.assertRegex(
         uploaded_file.key,
         f"^wf-{workflow.id}/wfm-{step.id}/[-0-9a-f]{{36}}\\.csv$")
     self.assertEqual(
         get_s3_object_with_data(s3.UserFilesBucket,
                                 uploaded_file.key)["Body"],
         b"1234567",
     )
     self.assertEqual(uploaded_file.name, "foo.csv")
     # SetStepParams ran
     uuid = uploaded_file.key[-40:-4]
     step.refresh_from_db()
     self.assertEqual(step.params, {"file": uuid})
     # Send deltas
     send_update.assert_called()
     self.assertEqual(
         send_update.mock_calls[0][1][1].steps[step.id].files,
         [
             clientside.UploadedFile(
                 name="foo.csv",
                 uuid=uuid,
                 size=7,
                 created_at=uploaded_file.created_at,
             )
         ],
     )
     queue_render.assert_called()
Пример #22
0
def _put(b: bytes) -> None:
    s3.put_bytes(Bucket, Key, b)