示例#1
0
def _do_create_result(
    workflow_id: int, wf_module: WfModule, result: FetchResult, now: timezone.datetime
) -> None:
    """
    Do database manipulations for create_result().

    Modify `wf_module` in-place.

    Do *not* do the logic in ChangeDataVersionCommand. We're creating a new
    version, not doing something undoable.

    Raise WfModule.DoesNotExist or Workflow.DoesNotExist in case of a race.
    """
    error = ""
    if result.errors:
        if result.errors[0].message.id != "TODO_i18n":
            raise RuntimeError("TODO handle i18n-ready fetch-result errors")
        elif result.errors[0].quick_fixes:
            raise RuntimeError("TODO handle quick fixes from fetches")
        else:
            error = result.errors[0].message.args["text"]

    with _locked_wf_module(workflow_id, wf_module):
        storedobjects.create_stored_object(
            workflow_id, wf_module.id, result.path, stored_at=now
        )
        storedobjects.enforce_storage_limits(wf_module)

        wf_module.fetch_error = error
        wf_module.is_busy = False
        wf_module.last_update_check = now
        wf_module.save(update_fields=["fetch_error", "is_busy", "last_update_check"])
    def test_fetch_result_deleted_file_means_none(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        with parquet_file({"A": [1]}) as path:
            so = create_stored_object(workflow.id, wf_module.id, path)
        wf_module.stored_data_version = so.stored_at
        wf_module.save(update_fields=["stored_data_version"])
        # Now delete the file on S3 -- but leave the DB pointing to it.
        minio.remove(so.bucket, so.key)

        def render(*args, fetch_result, **kwargs):
            self.assertIsNone(fetch_result)
            return RenderResult()

        with self._stub_module(render):
            self.run_with_async_db(
                execute_wfmodule(
                    self.chroot_context,
                    workflow,
                    wf_module,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))
示例#3
0
    def test_fetch_result_happy_path(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
            fetch_error="maybe an error",
        )
        with parquet_file({"A": [1]}) as path:
            so = create_stored_object(workflow.id, wf_module.id, path)
        wf_module.stored_data_version = so.stored_at
        wf_module.save(update_fields=["stored_data_version"])

        def render(*args, fetch_result, **kwargs):
            self.assertEqual(
                fetch_result.errors,
                [RenderError(I18nMessage.TODO_i18n("maybe an error"))],
            )
            assert_arrow_table_equals(
                pyarrow.parquet.read_table(str(fetch_result.path)), {"A": [1]})
            return RenderResult()

        with self._stub_module(render):
            self.run_with_async_db(
                execute_wfmodule(
                    workflow,
                    wf_module,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    Path("/unused"),
                ))
示例#4
0
 def test_load_selected_stored_object(self):
     workflow = Workflow.create_and_init()
     step = workflow.tabs.first().steps.create(
         order=0, slug="step-1", module_id_name="foodeleted"
     )
     with parquet_file({"A": [1]}) as path1:
         storedobjects.create_stored_object(workflow.id, step.id, path1)
     with parquet_file({"A": [2]}) as path2:
         so2 = storedobjects.create_stored_object(workflow.id, step.id, path2)
     with parquet_file({"A": [3]}) as path3:
         storedobjects.create_stored_object(workflow.id, step.id, path3)
     step.stored_data_version = so2.stored_at
     step.save(update_fields=["stored_data_version"])
     result = self.run_with_async_db(
         fetch.load_database_objects(workflow.id, step.id)
     )
     self.assertEqual(result[3], so2)
     self.assertEqual(result.stored_object, so2)
示例#5
0
    def test_wf_module_duplicate(self):
        workflow = Workflow.create_and_init()
        step1 = workflow.tabs.first().wf_modules.create(order=0, slug="step-1")

        # store data to test that it is duplicated
        with tempfile_context() as path1:
            path1.write_bytes(b"12345")
            create_stored_object(workflow.id, step1.id, path1)
        with tempfile_context() as path2:
            path1.write_bytes(b"23456")
            so2 = create_stored_object(workflow.id, step1.id, path2)
        step1.secrets = {"do not copy": {"name": "evil", "secret": "evil"}}
        step1.stored_data_version = so2.stored_at
        step1.save(update_fields=["stored_data_version"])

        # duplicate into another workflow, as we would do when duplicating a workflow
        workflow2 = Workflow.create_and_init()
        tab2 = workflow2.tabs.first()
        step1d = step1.duplicate_into_new_workflow(tab2)
        step1d.refresh_from_db()  # test what we actually have in the db

        self.assertEqual(step1d.slug, "step-1")
        self.assertEqual(step1d.workflow, workflow2)
        self.assertEqual(step1d.module_id_name, step1.module_id_name)
        self.assertEqual(step1d.order, step1.order)
        self.assertEqual(step1d.notes, step1.notes)
        self.assertEqual(step1d.last_update_check, step1.last_update_check)
        self.assertEqual(step1d.is_collapsed, step1.is_collapsed)
        self.assertEqual(step1d.params, step1.params)
        self.assertEqual(step1d.secrets, {})

        # Stored data should contain a clone of content only, not complete version history
        self.assertEqual(step1d.stored_objects.count(), 1)
        self.assertEqual(step1d.stored_data_version, step1.stored_data_version)
        so2d = step1d.stored_objects.first()
        # The StoredObject was copied byte for byte into a different file
        self.assertNotEqual(so2d.key, so2.key)
        self.assertEqual(
            minio.get_object_with_data(minio.StoredObjectsBucket,
                                       so2d.key)["Body"],
            minio.get_object_with_data(minio.StoredObjectsBucket,
                                       so2.key)["Body"],
        )
示例#6
0
    def test_fetch_result_happy_path(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
            fetch_errors=[
                RenderError(I18nMessage("foo", {}, "module")),
                RenderError(I18nMessage("bar", {"x": "y"}, "cjwmodule")),
            ],
        )
        with parquet_file({"A": [1]}) as path:
            so = create_stored_object(workflow.id, step.id, path)
        step.stored_data_version = so.stored_at
        step.save(update_fields=["stored_data_version"])

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            python_code=textwrap.dedent(
                """
                import pyarrow as pa
                import pandas as pd
                from pandas.testing import assert_frame_equal
                from cjwkernel.types import RenderError, I18nMessage

                def render(table, params, *, fetch_result, **kwargs):
                    assert fetch_result.errors == [
                        RenderError(I18nMessage("foo", {}, "module")),
                        RenderError(I18nMessage("bar", {"x": "y"}, "cjwmodule")),
                    ]
                    fetch_dataframe = pa.parquet.read_table(str(fetch_result.path))
                    assert_frame_equal(fetch_dataframe, pd.DataFrame({"A": [1]}))
                    return pd.DataFrame()
                """
            ),
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    chroot_context=self.chroot_context,
                    workflow=workflow,
                    step=step,
                    module_zipfile=module_zipfile,
                    params={},
                    tab_name=tab.name,
                    input_path=self.empty_table_path,
                    input_table_columns=[],
                    tab_results={},
                    output_path=self.output_path,
                )
            )
示例#7
0
def _do_create_result(
    workflow_id: int, step: Step, result: FetchResult, now: datetime.datetime
) -> None:
    """Do database manipulations for create_result().

    Modify `step` in-place.

    Do *not* do the logic in SetStepDataVersion. We're creating a new
    version, not doing something undoable.

    Raise Step.DoesNotExist or Workflow.DoesNotExist in case of a race.
    """
    with _locked_step(workflow_id, step):
        storedobjects.create_stored_object(
            workflow_id, step.id, result.path, stored_at=now
        )
        storedobjects.delete_old_files_to_enforce_storage_limits(step=step)
        # Assume caller sends new list to clients via SetStepDataVersion

        step.fetch_errors = result.errors
        step.is_busy = False
        step.last_update_check = now
        step.save(update_fields=["fetch_errors", "is_busy", "last_update_check"])
示例#8
0
def _do_create_result(workflow_id: int, wf_module: WfModule,
                      result: FetchResult, now: timezone.datetime) -> None:
    """
    Do database manipulations for create_result().

    Modify `wf_module` in-place.

    Do *not* do the logic in ChangeDataVersionCommand. We're creating a new
    version, not doing something undoable.

    Raise WfModule.DoesNotExist or Workflow.DoesNotExist in case of a race.
    """
    with _locked_wf_module(workflow_id, wf_module):
        storedobjects.create_stored_object(workflow_id,
                                           wf_module.id,
                                           result.path,
                                           stored_at=now)
        storedobjects.enforce_storage_limits(wf_module)

        wf_module.fetch_errors = result.errors
        wf_module.is_busy = False
        wf_module.last_update_check = now
        wf_module.save(
            update_fields=["fetch_errors", "is_busy", "last_update_check"])
示例#9
0
    def test_fetch_result_deleted_file_means_none(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        with parquet_file({"A": [1]}) as path:
            so = create_stored_object(workflow.id, step.id, path)
        step.stored_data_version = so.stored_at
        step.save(update_fields=["stored_data_version"])
        # Now delete the file on S3 -- but leave the DB pointing to it.
        s3.remove(s3.StoredObjectsBucket, so.key)

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            python_code=textwrap.dedent(
                """
                import pandas as pd
                def render(table, params, *, fetch_result, **kwargs):
                    assert fetch_result is None
                    return pd.DataFrame()
                """
            ),
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    chroot_context=self.chroot_context,
                    workflow=workflow,
                    step=step,
                    module_zipfile=module_zipfile,
                    params={},
                    tab_name=tab.name,
                    input_path=self.empty_table_path,
                    input_table_columns=[],
                    tab_results={},
                    output_path=self.output_path,
                )
            )
示例#10
0
    def test_fetch_result_deleted_file_means_none(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        with parquet_file({"A": [1]}) as path:
            so = create_stored_object(workflow.id, wf_module.id, path)
        wf_module.stored_data_version = so.stored_at
        wf_module.save(update_fields=["stored_data_version"])
        # Now delete the file on S3 -- but leave the DB pointing to it.
        minio.remove(minio.StoredObjectsBucket, so.key)

        def render(*args, fetch_result, **kwargs):
            self.assertIsNone(fetch_result)
            return RenderResult()

        module_zipfile = create_module_zipfile(
            "x",
            python_code=textwrap.dedent("""
                import pandas as pd
                def render(table, params, *, fetch_result, **kwargs):
                    assert fetch_result is None
                    return pd.DataFrame()
                """),
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_wfmodule(
                    self.chroot_context,
                    workflow,
                    wf_module,
                    module_zipfile,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))