Пример #1
0
    def test_fetch_result_happy_path(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
            fetch_error="maybe an error",
        )
        with parquet_file({"A": [1]}) as path:
            so = create_stored_object(workflow.id, wf_module.id, path)
        wf_module.stored_data_version = so.stored_at
        wf_module.save(update_fields=["stored_data_version"])

        def render(*args, fetch_result, **kwargs):
            self.assertEqual(
                fetch_result.errors,
                [RenderError(I18nMessage.TODO_i18n("maybe an error"))],
            )
            assert_arrow_table_equals(
                pyarrow.parquet.read_table(str(fetch_result.path)), {"A": [1]})
            return RenderResult()

        with self._stub_module(render):
            self.run_with_async_db(
                execute_wfmodule(
                    workflow,
                    wf_module,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    Path("/unused"),
                ))
    def test_fetch_result_deleted_file_means_none(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        with parquet_file({"A": [1]}) as path:
            so = create_stored_object(workflow.id, wf_module.id, path)
        wf_module.stored_data_version = so.stored_at
        wf_module.save(update_fields=["stored_data_version"])
        # Now delete the file on S3 -- but leave the DB pointing to it.
        minio.remove(so.bucket, so.key)

        def render(*args, fetch_result, **kwargs):
            self.assertIsNone(fetch_result)
            return RenderResult()

        with self._stub_module(render):
            self.run_with_async_db(
                execute_wfmodule(
                    self.chroot_context,
                    workflow,
                    wf_module,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))
Пример #3
0
def _load_tab_flows(workflow: Workflow, delta_id: int) -> List[TabFlow]:
    """
    Query `workflow` for each tab's `TabFlow` (ordered by tab position).

    Raise `ModuleError` or `ValueError` if migrate_params() fails. Failed
    migration means the whole execute can't happen.
    """
    ret = []
    with workflow.cooperative_lock():  # reloads workflow
        if workflow.last_delta_id != delta_id:
            raise UnneededExecution

        for tab_model in workflow.live_tabs.all():
            steps = [
                ExecuteStep(
                    step,
                    (step.module_version.param_schema if step.module_version
                     is not None else ParamDType.Dict({})),
                    # We need to invoke the kernel and migrate _all_ modules'
                    # params (WfModule.get_params), because we can only check
                    # for tab cycles after migrating (and before calling any
                    # render()).
                    _get_migrated_params(step),
                ) for step in tab_model.live_wf_modules.all()
            ]
            ret.append(TabFlow(Tab(tab_model.slug, tab_model.name), steps))
    return ret
    def test_fetch_result_no_bucket_or_key_stored_object_means_none(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
            stored_data_version=timezone.now(),
        )
        wf_module.stored_objects.create(
            stored_at=wf_module.stored_data_version,
            bucket="",
            key="",
            size=0,
            hash="whatever",
        )

        def render(*args, fetch_result, **kwargs):
            self.assertIsNone(fetch_result)
            return RenderResult()

        with self._stub_module(render):
            self.run_with_async_db(
                execute_wfmodule(
                    self.chroot_context,
                    workflow,
                    wf_module,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))
Пример #5
0
    def test_fetch_result_no_stored_object_means_none(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
        )

        module_zipfile = create_module_zipfile(
            "x",
            python_code=textwrap.dedent("""
                import pandas as pd
                def render(table, params, *, fetch_result, **kwargs):
                    assert fetch_result is None
                    return pd.DataFrame()
                """),
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_wfmodule(
                    self.chroot_context,
                    workflow,
                    wf_module,
                    module_zipfile,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))
    def test_report_module_error(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
        )

        def render(*args, fetch_result, **kwargs):
            raise ModuleExitedError(-9, "")

        with self._stub_module(render):
            result = self.run_with_async_db(
                execute_wfmodule(
                    self.chroot_context,
                    workflow,
                    wf_module,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))
        self.assertEqual(
            result,
            RenderResult(errors=[
                RenderError(
                    I18nMessage.TODO_i18n(
                        "Something unexpected happened. We have been notified and are "
                        "working to fix it. If this persists, contact us. Error code: "
                        "SIGKILL"))
            ]),
        )
Пример #7
0
    def test_render_using_tab_output(self):
        def render(table, params):
            self.assertEqual(params["tabparam"].slug, "tab-1")
            self.assertEqual(params["tabparam"].name, "Tab 1")
            self.assertEqual(
                params["tabparam"].columns,
                {
                    "X": ptypes.RenderColumn("X", "number", "{:,d}"),
                    "Y": ptypes.RenderColumn("Y", "text", None),
                },
            )
            assert_frame_equal(params["tabparam"].dataframe,
                               pd.DataFrame({
                                   "X": [1],
                                   "Y": ["y"]
                               }))

        with arrow_table_context(
            {
                "X": [1],
                "Y": ["y"]
            },
                columns=[
                    Column("X", ColumnType.Number("{:,d}")),
                    Column("Y", ColumnType.Text()),
                ],
                dir=self.basedir,
        ) as atable:
            self._test_render(
                render,
                params={"tabparam": TabOutput(Tab("tab-1", "Tab 1"), atable)})
Пример #8
0
    def test_clean_tabs_happy_path(self):
        tab2 = Tab("tab-2", "Tab 2")
        tab2_output = arrow_table({"B": [1]})
        tab3 = Tab("tab-3", "Tab 3")
        tab3_output = arrow_table({"C": [1]})

        context = self._render_context(tab_results={
            tab2: RenderResult(tab2_output),
            tab3: RenderResult(tab3_output),
        })
        result = clean_value(ParamDType.Multitab(), ["tab-2", "tab-3"],
                             context)
        self.assertEqual(
            result,
            [TabOutput(tab2, tab2_output),
             TabOutput(tab3, tab3_output)])
Пример #9
0
    def test_render_without_input_or_loads_data_raises_no_loaded_data(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": False},
            python_code="def render(table, params): return None",
        )

        result = self.run_with_async_db(
            execute_step(
                self.chroot_context,
                workflow,
                step,
                module_zipfile,
                {},
                Tab(tab.slug, tab.name),
                RenderResult(),
                {},
                self.output_path,
            ))
        assert_render_result_equals(
            result,
            RenderResult(errors=[
                RenderError(
                    I18nMessage("py.renderer.execute.step.NoLoadedDataError",
                                {}, None))
            ]),
        )
Пример #10
0
 def _test_render(
         self,
         render_fn,
         arrow_table_dict={},
         arrow_table=None,
         params={},
         tab=Tab("tab-1", "Tab 1"),
         fetch_result=None,
         output_filename=None,
 ):
     with ExitStack() as ctx:
         if arrow_table is None:
             arrow_table = ctx.enter_context(
                 arrow_table_context(arrow_table_dict, dir=self.basedir))
         ctx.enter_context(patch.object(module, "render", render_fn))
         out_filename = ctx.enter_context(
             tempfile_context(dir=self.basedir)).name
         thrift_result = module.render_thrift(
             ttypes.RenderRequest(
                 str(self.basedir),
                 arrow_arrow_table_to_thrift(arrow_table),
                 arrow_params_to_thrift(Params(params)),
                 arrow_tab_to_thrift(tab),
                 arrow_fetch_result_to_thrift(fetch_result)
                 if fetch_result is not None else None,
                 out_filename,
             ))
         return thrift_render_result_to_arrow(thrift_result, self.basedir)
Пример #11
0
    def test_fetch_result_no_stored_object_means_none(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
        )

        def render(*args, fetch_result, **kwargs):
            self.assertIsNone(fetch_result)
            return RenderResult()

        with self._stub_module(render):
            self.run_with_async_db(
                execute_wfmodule(
                    self.chroot_context,
                    workflow,
                    wf_module,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))
Пример #12
0
    def test_fetch_result_deleted_stored_object_means_none(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
            # wf_module.stored_data_version is buggy: it can point at a nonexistent
            # StoredObject. Let's do that.
            stored_data_version=timezone.now(),
        )

        def render(*args, fetch_result, **kwargs):
            self.assertIsNone(fetch_result)
            return RenderResult()

        with self._stub_module(render):
            self.run_with_async_db(
                execute_wfmodule(
                    self.chroot_context,
                    workflow,
                    wf_module,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))
Пример #13
0
    def test_clean_tabs_preserve_ordering(self):
        tab2 = Tab("tab-2", "Tab 2")
        tab2_output = arrow_table({"B": [1]})
        tab3 = Tab("tab-3", "Tab 3")
        tab3_output = arrow_table({"C": [1]})

        context = self._render_context(
            # RenderContext's dict ordering determines desired tab order.
            # (Python 3.7 spec: dict is ordered in insertion order. CPython 3.6
            # and PyPy 7 do this, too.)
            tab_results={
                tab3: RenderResult(tab3_output),
                tab2: RenderResult(tab2_output),
            })
        # Supply wrongly-ordered tabs; renderprep should reorder them.
        result = clean_value(ParamDType.Multitab(), ["tab-2", "tab-3"],
                             context)
        self.assertEqual([t.tab.slug for t in result], ["tab-3", "tab-2"])
Пример #14
0
    def render(
        self,
        compiled_module: CompiledModule,
        chroot_context: ChrootContext,
        basedir: Path,
        input_table: ArrowTable,
        params: Params,
        tab: Tab,
        fetch_result: Optional[FetchResult],
        output_filename: str,
    ) -> RenderResult:
        """
        Run the module's `render_thrift()` function and return its result.

        Raise ModuleError if the module has a bug.
        """
        chroot_dir = chroot_context.chroot.root
        basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir)
        request = ttypes.RenderRequest(
            str(basedir_seen_by_module),
            input_table.to_thrift(),
            params.to_thrift(),
            tab.to_thrift(),
            None if fetch_result is None else fetch_result.to_thrift(),
            output_filename,
        )
        try:
            with chroot_context.writable_file(basedir / output_filename):
                result = self._run_in_child(
                    chroot_dir=chroot_dir,
                    network_config=pyspawner.NetworkConfig(),  # TODO disallow networking
                    compiled_module=compiled_module,
                    timeout=self.render_timeout,
                    result=ttypes.RenderResult(),
                    function="render_thrift",
                    args=[request],
                )
        finally:
            chroot_context.clear_unowned_edits()

        if result.table.filename and result.table.filename != output_filename:
            raise ModuleExitedError(0, "Module wrote to wrong output file")

        try:
            # RenderResult.from_thrift() verifies all filenames passed by the
            # module are in the directory the module has access to. It assumes
            # the Arrow file (if there is one) is untrusted, so it can raise
            # ValidateError
            render_result = RenderResult.from_thrift(result, basedir)
        except ValidateError as err:
            raise ModuleExitedError(0, "Module produced invalid data: %s" % str(err))
        return render_result
Пример #15
0
    def test_fetch_result_happy_path(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
            fetch_errors=[
                RenderError(I18nMessage("foo", {}, "module")),
                RenderError(I18nMessage("bar", {"x": "y"}, "cjwmodule")),
            ],
        )
        with parquet_file({"A": [1]}) as path:
            so = create_stored_object(workflow.id, step.id, path)
        step.stored_data_version = so.stored_at
        step.save(update_fields=["stored_data_version"])

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            python_code=textwrap.dedent("""
                import pyarrow as pa
                import pandas as pd
                from pandas.testing import assert_frame_equal
                from cjwkernel.types import RenderError, I18nMessage

                def render(table, params, *, fetch_result, **kwargs):
                    assert fetch_result.errors == [
                        RenderError(I18nMessage("foo", {}, "module")),
                        RenderError(I18nMessage("bar", {"x": "y"}, "cjwmodule")),
                    ]
                    fetch_dataframe = pa.parquet.read_table(str(fetch_result.path))
                    assert_frame_equal(fetch_dataframe, pd.DataFrame({"A": [1]}))
                    return pd.DataFrame()
                """),
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    self.chroot_context,
                    workflow,
                    step,
                    module_zipfile,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))
Пример #16
0
    def test_email_delta_when_errors_change(self, email_delta):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        # We need to actually populate the cache to set up the test. The code
        # under test will only try to open the render result if the database
        # says there's something there.
        rendercache.cache_render_result(
            workflow,
            step,
            workflow.last_delta_id - 1,
            RenderResult(errors=[
                RenderError(
                    I18nMessage("py.renderer.execute.step.noModule", {}, None))
            ]),
        )
        step.last_relevant_delta_id = workflow.last_delta_id
        step.save(update_fields=["last_relevant_delta_id"])

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            # returns different error
            python_code=
            'import pandas as pd\ndef render(table, params): return [{"id": "err"}]',
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    self.chroot_context,
                    workflow,
                    step,
                    module_zipfile,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))

        email_delta.assert_called()  # there's new data
Пример #17
0
    def test_email_delta_when_stale_crr_is_unreachable(self, email_delta,
                                                       read_cache):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        # We need to actually populate the cache to set up the test. The code
        # under test will only try to open the render result if the database
        # says there's something there.
        rendercache.cache_render_result(
            workflow,
            step,
            workflow.last_delta_id - 1,
            RenderResult(arrow_table({})),  # does not write a Parquet file
        )
        step.last_relevant_delta_id = workflow.last_delta_id
        step.save(update_fields=["last_relevant_delta_id"])

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            # returns different data
            python_code=
            'import pandas as pd\ndef render(table, params): return pd.DataFrame({"A": [2]})',
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    self.chroot_context,
                    workflow,
                    step,
                    module_zipfile,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))

        read_cache.assert_not_called()  # it would give CorruptCacheError
        email_delta.assert_called()  # there's new data
Пример #18
0
    def test_email_delta(self, email_delta):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        rendercache.cache_render_result(
            workflow,
            wf_module,
            workflow.last_delta_id - 1,
            RenderResult(arrow_table({"A": [1]})),
        )
        wf_module.last_relevant_delta_id = workflow.last_delta_id
        wf_module.save(update_fields=["last_relevant_delta_id"])

        module_zipfile = create_module_zipfile(
            "x",
            python_code=
            'import pandas as pd\ndef render(table, params): return pd.DataFrame({"A": [2]})',
        )
        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_wfmodule(
                    self.chroot_context,
                    workflow,
                    wf_module,
                    module_zipfile,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))
        email_delta.assert_called()
        delta = email_delta.call_args[0][0]

        self.assertEqual(delta.user, workflow.owner)
        self.assertEqual(delta.workflow, workflow)
        self.assertEqual(delta.wf_module, wf_module)
        self.assertEqual(delta.old_result, RenderResult(arrow_table({"A":
                                                                     [1]})))
        self.assertEqual(delta.new_result, RenderResult(arrow_table({"A":
                                                                     [2]})))
Пример #19
0
    def test_email_delta_ignore_corrupt_cache_error(self, email_delta,
                                                    read_cache):
        read_cache.side_effect = rendercache.CorruptCacheError
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        # We need to actually populate the cache to set up the test. The code
        # under test will only try to open the render result if the database
        # says there's something there.
        rendercache.cache_render_result(
            workflow,
            wf_module,
            workflow.last_delta_id - 1,
            RenderResult(arrow_table({"A": [1]})),
        )
        wf_module.last_relevant_delta_id = workflow.last_delta_id
        wf_module.save(update_fields=["last_relevant_delta_id"])

        module_zipfile = create_module_zipfile(
            "x",
            # returns different data -- but CorruptCacheError means we won't care.
            python_code=
            'import pandas as pd\ndef render(table, params): return pd.DataFrame({"A": [2]})',
        )

        with self.assertLogs(level=logging.ERROR):
            self.run_with_async_db(
                execute_wfmodule(
                    self.chroot_context,
                    workflow,
                    wf_module,
                    module_zipfile,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))

        email_delta.assert_not_called()
Пример #20
0
    def test_email_delta(self, email_delta):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        rendercache.cache_render_result(
            workflow,
            wf_module,
            workflow.last_delta_id - 1,
            RenderResult(arrow_table({"A": [1]})),
        )
        wf_module.last_relevant_delta_id = workflow.last_delta_id
        wf_module.save(update_fields=["last_relevant_delta_id"])

        with arrow_table_context({"A": [2]}) as table2:

            def render(*args, **kwargs):
                return RenderResult(table2)

            with self._stub_module(render):
                self.run_with_async_db(
                    execute_wfmodule(
                        self.chroot_context,
                        workflow,
                        wf_module,
                        {},
                        Tab(tab.slug, tab.name),
                        RenderResult(),
                        {},
                        self.output_path,
                    ))

        email_delta.assert_called()
        delta = email_delta.call_args[0][0]
        self.assertEqual(delta.user, workflow.owner)
        self.assertEqual(delta.workflow, workflow)
        self.assertEqual(delta.wf_module, wf_module)
        self.assertEqual(delta.old_result, RenderResult(arrow_table({"A":
                                                                     [1]})))
        self.assertEqual(delta.new_result, RenderResult(arrow_table({"A":
                                                                     [2]})))
Пример #21
0
    def test_fetch_result_deleted_file_means_none(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        with parquet_file({"A": [1]}) as path:
            so = create_stored_object(workflow.id, step.id, path)
        step.stored_data_version = so.stored_at
        step.save(update_fields=["stored_data_version"])
        # Now delete the file on S3 -- but leave the DB pointing to it.
        s3.remove(s3.StoredObjectsBucket, so.key)

        def render(*args, fetch_result, **kwargs):
            self.assertIsNone(fetch_result)
            return RenderResult()

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            python_code=textwrap.dedent("""
                import pandas as pd
                def render(table, params, *, fetch_result, **kwargs):
                    assert fetch_result is None
                    return pd.DataFrame()
                """),
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    self.chroot_context,
                    workflow,
                    step,
                    module_zipfile,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))
Пример #22
0
    def test_email_delta_ignore_corrupt_cache_error(self, email_delta,
                                                    read_cache):
        read_cache.side_effect = rendercache.CorruptCacheError
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        # We need to actually populate the cache to set up the test. The code
        # under test will only try to open the render result if the database
        # says there's something there.
        rendercache.cache_render_result(
            workflow,
            wf_module,
            workflow.last_delta_id - 1,
            RenderResult(arrow_table({"A": [1]})),
        )
        wf_module.last_relevant_delta_id = workflow.last_delta_id
        wf_module.save(update_fields=["last_relevant_delta_id"])

        with arrow_table_context({"A": [2]}) as table2:

            def render(*args, **kwargs):
                return RenderResult(table2)

            with self._stub_module(render):
                with self.assertLogs(level=logging.ERROR):
                    self.run_with_async_db(
                        execute_wfmodule(
                            self.chroot_context,
                            workflow,
                            wf_module,
                            {},
                            Tab(tab.slug, tab.name),
                            RenderResult(),
                            {},
                            self.output_path,
                        ))

        email_delta.assert_not_called()
Пример #23
0
    def test_report_module_error(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
        )

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            python_code="def render(table, params):\n  undefined()",
        )

        with self.assertLogs(level=logging.INFO):
            result = self.run_with_async_db(
                execute_step(
                    self.chroot_context,
                    workflow,
                    step,
                    module_zipfile,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))
        assert_render_result_equals(
            result,
            RenderResult(errors=[
                RenderError(
                    I18nMessage(
                        "py.renderer.execute.step.user_visible_bug_during_render",
                        {
                            "message":
                            "exit code 1: NameError: name 'undefined' is not defined"
                        },
                        None,
                    ))
            ]),
        )
Пример #24
0
    def test_clean_multicolumn_from_other_tab(self):
        tab2 = Tab("tab-2", "Tab 2")
        tab2_output_table = arrow_table({"A-from-tab-2": [1, 2]})

        schema = ParamDType.Dict({
            "tab":
            ParamDType.Tab(),
            "columns":
            ParamDType.Multicolumn(tab_parameter="tab"),
        })
        params = {"tab": "tab-2", "columns": ["A-from-tab-1", "A-from-tab-2"]}
        context = self._render_context(
            input_table=arrow_table({"A-from-tab-1": [1]}),
            tab_results={tab2: RenderResult(tab2_output_table)},
            params=params,
        )
        result = clean_value(schema, params, context)
        # result['tab'] is not what we're testing here
        self.assertEqual(result["columns"], ["A-from-tab-2"])
Пример #25
0
def _load_tab_flows(workflow: Workflow, delta_id: int) -> List[TabFlow]:
    """Query `workflow` for each tab's `TabFlow` (ordered by tab position).

    Raise `ModuleError` or `ValueError` if migrate_params() fails. Failed
    migration means the whole execute can't happen.
    """
    ret = []
    with workflow.cooperative_lock():  # reloads workflow
        if workflow.last_delta_id != delta_id:
            raise UnneededExecution

        module_zipfiles = MODULE_REGISTRY.all_latest()

        for tab_model in workflow.live_tabs.all():
            steps = [
                _build_execute_step(step, module_zipfiles=module_zipfiles)
                for step in tab_model.live_steps.all()
            ]
            ret.append(TabFlow(Tab(tab_model.slug, tab_model.name), steps))
    return ret
Пример #26
0
    def render(
        self,
        compiled_module: CompiledModule,
        basedir: Path,
        input_table: ArrowTable,
        params: Params,
        tab: Tab,
        fetch_result: Optional[FetchResult],
        output_filename: str,
    ) -> RenderResult:
        request = ttypes.RenderRequest(
            str(basedir),
            input_table.to_thrift(),
            params.to_thrift(),
            tab.to_thrift(),
            None if fetch_result is None else fetch_result.to_thrift(),
            output_filename,
        )
        with _chroot_dir_context(provide_paths=[basedir],
                                 extract_paths=[basedir / output_filename
                                                ]) as chroot:
            result = self._run_in_child(
                chroot=chroot,
                chroot_paths=[basedir] + DATA_PATHS + PARQUET_PATHS +
                NETWORKING_PATHS,  # TODO nix networking
                compiled_module=compiled_module,
                timeout=self.render_timeout,
                result=ttypes.RenderResult(),
                function="render_thrift",
                args=[request],
            )
            if result.table.filename and result.table.filename != output_filename:
                raise ModuleExitedError(0, "Module wrote to wrong output file")

        # RenderResult.from_thrift() verifies all filenames passed by the
        # module are in the directory the module has access to.
        render_result = RenderResult.from_thrift(result, basedir)
        if render_result.table.table is not None:
            validate(render_result.table.table, render_result.table.metadata)
        return render_result
Пример #27
0
    def test_fetch_result_no_bucket_or_key_stored_object_means_none(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
            stored_data_version=datetime.datetime.now(),
        )
        step.stored_objects.create(stored_at=step.stored_data_version,
                                   key="",
                                   size=0,
                                   hash="whatever")

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            python_code=textwrap.dedent("""
                import pandas as pd
                def render(table, params, *, fetch_result, **kwargs):
                    assert fetch_result is None
                    return pd.DataFrame()
                """),
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    self.chroot_context,
                    workflow,
                    step,
                    module_zipfile,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))
Пример #28
0
    def test_email_no_delta_when_errors_stay_the_same(self, email_delta):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        # We need to actually populate the cache to set up the test. The code
        # under test will only try to open the render result if the database
        # says there's something there.
        rendercache.cache_render_result(
            workflow,
            step,
            workflow.last_delta_id - 1,
            RenderResult(errors=[
                RenderError(
                    I18nMessage("py.renderer.execute.step.noModule", {}, None))
            ]),
        )
        step.last_relevant_delta_id = workflow.last_delta_id
        step.save(update_fields=["last_relevant_delta_id"])

        self.run_with_async_db(
            execute_step(
                self.chroot_context,
                workflow,
                step,
                None,  # module_zipfile
                {},
                Tab(tab.slug, tab.name),
                RenderResult(),
                {},
                self.output_path,
            ))

        email_delta.assert_not_called()  # error is the same error
Пример #29
0
    def test_load_dynamic(self):
        code = b"def render(table, params):\n    return table * 2"
        minio.client.put_object(
            Bucket=minio.ExternalModulesBucket,
            Key="imported/abcdef/imported.py",
            Body=code,
            ContentLength=len(code),
        )

        with self.assertLogs("cjwstate.modules.loaded_module"):
            lm = LoadedModule.for_module_version(
                MockModuleVersion("imported", "abcdef", ParamDType.Dict({}), "now")
            )

        self.assertEqual(lm.name, "imported:abcdef")

        # This ends up being kinda an integration test.
        with ExitStack() as ctx:
            basedir = Path(ctx.enter_context(tempdir_context(prefix="test-basedir-")))
            basedir.chmod(0o755)
            input_table = ctx.enter_context(
                arrow_table_context({"A": [1]}, dir=basedir)
            )
            input_table.path.chmod(0o644)
            output_tf = ctx.enter_context(tempfile.NamedTemporaryFile(dir=basedir))

            ctx.enter_context(self.assertLogs("cjwstate.modules.loaded_module"))

            result = lm.render(
                basedir=basedir,
                input_table=input_table,
                params=Params({"col": "A"}),
                tab=Tab("tab-1", "Tab 1"),
                fetch_result=None,
                output_filename=Path(output_tf.name).name,
            )

        assert_render_result_equals(result, RenderResult(arrow_table({"A": [2]})))
Пример #30
0
    def test_render_with_tab_name(self):
        def render(table, params, *, tab_name):
            self.assertEqual(tab_name, "Tab X")

        self._test_render(render, tab=Tab("tab-1", "Tab X"))