Пример #1
0
 def test_default_render_returns_fetch_result(self):
     # Functionality used by libraryofcongress
     with ExitStack() as ctx:
         input_arrow_table = ctx.enter_context(
             arrow_table_context({"A": [1]}, dir=self.basedir)
         )
         parquet_filename = Path(
             ctx.enter_context(parquet_file({"A": [2]}, dir=self.basedir)).name
         ).name
         out_filename = ctx.enter_context(tempfile_context(dir=self.basedir)).name
         thrift_result = module.render_thrift(
             ttypes.RenderRequest(
                 str(self.basedir),
                 input_arrow_table.to_thrift(),
                 Params({}).to_thrift(),
                 ttypes.Tab("tab-1", "Tab 1"),
                 ttypes.FetchResult(
                     parquet_filename,
                     [RenderError(I18nMessage.TODO_i18n("A warning")).to_thrift()],
                 ),
                 out_filename,
             )
         )
         result = RenderResult.from_thrift(thrift_result, self.basedir)
         assert_render_result_equals(
             result,
             RenderResult(
                 arrow_table({"A": [2]}),
                 [RenderError(I18nMessage.TODO_i18n("A warning"))],
             ),
         )
Пример #2
0
    def test_execute_new_revision(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        delta1 = workflow.last_delta
        create_module_zipfile(
            "mod",
            python_code=
            'import pandas as pd\ndef render(table, params): return pd.DataFrame({"B": [2]})',
        )
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=delta1.id,
            module_id_name="mod",
        )

        result1 = RenderResult(arrow_table({"A": [1]}))
        cache_render_result(workflow, wf_module, delta1.id, result1)

        delta2 = InitWorkflowCommand.create(workflow)
        wf_module.last_relevant_delta_id = delta2.id
        wf_module.save(update_fields=["last_relevant_delta_id"])

        self._execute(workflow)

        wf_module.refresh_from_db()

        with open_cached_render_result(
                wf_module.cached_render_result) as result:
            assert_render_result_equals(result,
                                        RenderResult(arrow_table({"B": [2]})))
Пример #3
0
 def test_deleted_module(self):
     workflow = Workflow.create_and_init()
     tab = workflow.tabs.first()
     step = tab.steps.create(
         order=0,
         slug="step-1",
         module_id_name="deleted_module",
         last_relevant_delta_id=workflow.last_delta_id,
     )
     result = self.run_with_async_db(
         execute_step(
             self.chroot_context,
             workflow,
             step,
             None,
             {},
             tab.to_arrow(),
             RenderResult(),
             {},
             self.output_path,
         ))
     expected = RenderResult(errors=[
         RenderError(
             I18nMessage("py.renderer.execute.step.noModule", {}, None))
     ])
     assert_render_result_equals(result, expected)
     step.refresh_from_db()
     self.assertEqual(step.cached_render_result.errors, expected.errors)
Пример #4
0
    def test_execute_new_revision(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        create_module_zipfile(
            "mod",
            spec_kwargs={"loads_data": True},
            python_code=
            'import pandas as pd\ndef render(table, params): return pd.DataFrame({"B": [2]})',
        )
        step = tab.steps.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=1,
            module_id_name="mod",
        )
        cache_render_result(workflow, step, 1,
                            RenderResult(arrow_table({"A": [1]})))
        step.last_relevant_delta_id = 2
        step.save(update_fields=["last_relevant_delta_id"])

        self._execute(workflow)

        step.refresh_from_db()

        with open_cached_render_result(step.cached_render_result) as result:
            assert_render_result_equals(result,
                                        RenderResult(arrow_table({"B": [2]})))
Пример #5
0
    def test_render_empty_file_fetch_result_is_parquet(self):
        def render(*args, fetch_result):
            return fetch_result.dataframe

        with tempfile_context(dir=self.basedir) as tf:
            result = self._test_render(render, fetch_result=FetchResult(tf))
            assert_render_result_equals(result, RenderResult(arrow_table({})))
Пример #6
0
    def test_render_without_input_or_loads_data_raises_no_loaded_data(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": False},
            python_code="def render(table, params): return None",
        )

        result = self.run_with_async_db(
            execute_step(
                self.chroot_context,
                workflow,
                step,
                module_zipfile,
                {},
                Tab(tab.slug, tab.name),
                RenderResult(),
                {},
                self.output_path,
            ))
        assert_render_result_equals(
            result,
            RenderResult(errors=[
                RenderError(
                    I18nMessage("py.renderer.execute.step.NoLoadedDataError",
                                {}, None))
            ]),
        )
Пример #7
0
    def test_render_with_parquet_fetch_result(self):
        def render(*args, fetch_result):
            return fetch_result

        with parquet_file({"A": ["fetched"]}, dir=self.basedir) as pf:
            result = self._test_render(render, fetch_result=FetchResult(pf))
            assert_render_result_equals(
                result, RenderResult(arrow_table({"A": ["fetched"]})))
Пример #8
0
    def test_render_with_non_parquet_fetch_result(self):
        def render(*args, fetch_result):
            return pd.DataFrame({"A": [fetch_result.path.read_text()]})

        with tempfile_context(dir=self.basedir) as tf:
            tf.write_bytes(b"abcd")
            result = self._test_render(render, fetch_result=FetchResult(tf))
            assert_render_result_equals(
                result, RenderResult(arrow_table({"A": ["abcd"]})))
Пример #9
0
    def test_resume_backtrack_on_corrupt_cache_error(self):
        module_zipfile = create_module_zipfile("mod")
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        # step1: cached result is fresh -- but CORRUPT
        step1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        rendercache.cache_render_result(
            workflow,
            step1,
            workflow.last_delta_id,
            RenderResult(arrow_table({"A": [1]})),
        )
        minio.put_bytes(
            # Write corrupted data -- will lead to CorruptCacheError
            rendercache.io.BUCKET,
            rendercache.io.crr_parquet_key(step1.cached_render_result),
            b"CORRUPT",
        )
        # step2: no cached result -- must re-render
        step2 = tab.wf_modules.create(order=1,
                                      slug="step-2",
                                      module_id_name="mod")

        tab_flow = TabFlow(
            tab.to_arrow(),
            [
                ExecuteStep(step1, module_zipfile, {}),
                ExecuteStep(step2, module_zipfile, {}),
            ],
        )

        with patch.object(Kernel,
                          "render",
                          side_effect=mock_render({"B": [2]})):
            with self._execute(workflow,
                               tab_flow, {},
                               expect_log_level=logging.ERROR) as result:
                expected = RenderResult(arrow_table({"B": [2]}))
                assert_render_result_equals(result, expected)

            self.assertEqual(
                # called with step1, then step2
                Kernel.render.call_count,
                2,
            )
            self.assertRegex(
                # Output is to the correct file
                Kernel.render.call_args[1]["output_filename"],
                r"execute-tab-output.*\.arrow",
            )
Пример #10
0
    def test_execute_partial_cache_hit(self):
        module_zipfile = create_module_zipfile("mod")
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        # step1: cached result is fresh. Should not render.
        step1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        rendercache.cache_render_result(
            workflow,
            step1,
            workflow.last_delta_id,
            RenderResult(arrow_table({"A": [1]})),
        )
        # step2: cached result is stale, so must be re-rendered
        step2 = tab.wf_modules.create(
            order=1,
            slug="step-2",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id - 1,
        )
        rendercache.cache_render_result(
            workflow,
            step2,
            workflow.last_delta_id - 1,
            RenderResult(arrow_table({"B": [2]})),
        )
        step2.last_relevant_delta_id = workflow.last_delta_id
        step2.save(update_fields=["last_relevant_delta_id"])

        tab_flow = TabFlow(
            tab.to_arrow(),
            [
                ExecuteStep(step1, module_zipfile, {}),
                ExecuteStep(step2, module_zipfile, {}),
            ],
        )

        with patch.object(Kernel,
                          "render",
                          side_effect=mock_render({"B": [3]})):
            with self._execute(workflow, tab_flow, {}) as result:
                expected = RenderResult(arrow_table({"B": [3]}))
                assert_render_result_equals(result, expected)

            Kernel.render.assert_called_once()  # step2, not step1

            self.assertRegex(
                # Output is to the correct file
                Kernel.render.call_args[1]["output_filename"],
                r"execute-tab-output.*\.arrow",
            )
Пример #11
0
    def test_execute_partial_cache_hit(self, fake_load_module):
        ModuleVersion.create_or_replace_from_spec(
            {"id_name": "mod", "name": "Mod", "category": "Clean", "parameters": []}
        )
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        # step1: cached result is fresh. Should not render.
        step1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        rendercache.cache_render_result(
            workflow,
            step1,
            workflow.last_delta_id,
            RenderResult(arrow_table({"A": [1]})),
        )
        # step2: cached result is stale, so must be re-rendered
        step2 = tab.wf_modules.create(
            order=1,
            slug="step-2",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id - 1,
        )
        rendercache.cache_render_result(
            workflow,
            step2,
            workflow.last_delta_id - 1,
            RenderResult(arrow_table({"B": [2]})),
        )
        step2.last_relevant_delta_id = workflow.last_delta_id
        step2.save(update_fields=["last_relevant_delta_id"])

        tab_flow = TabFlow(
            tab.to_arrow(),
            [
                ExecuteStep(step1, ParamDType.Dict({}), {}),
                ExecuteStep(step2, ParamDType.Dict({}), {}),
            ],
        )

        expected = RenderResult(arrow_table({"B": [3]}))
        fake_load_module.return_value.render.return_value = expected
        with self._execute(workflow, tab_flow, {}) as result:
            assert_render_result_equals(result, expected)

        fake_load_module.return_value.render.assert_called_once()  # step2, not step1
        self.assertRegex(
            # Output is to the correct file
            fake_load_module.return_value.render.call_args[1]["output_filename"],
            r"execute-tab-output.*\.arrow",
        )
Пример #12
0
    def test_resume_backtrack_on_corrupt_cache_error(self, fake_load_module):
        ModuleVersion.create_or_replace_from_spec(
            {"id_name": "mod", "name": "Mod", "category": "Clean", "parameters": []}
        )
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        # step1: cached result is fresh -- but CORRUPT
        step1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        rendercache.cache_render_result(
            workflow,
            step1,
            workflow.last_delta_id,
            RenderResult(arrow_table({"A": [1]})),
        )
        minio.put_bytes(
            # Write corrupted data -- will lead to CorruptCacheError
            rendercache.io.BUCKET,
            rendercache.io.crr_parquet_key(step1.cached_render_result),
            b"CORRUPT",
        )
        # step2: no cached result -- must re-render
        step2 = tab.wf_modules.create(order=1, slug="step-2", module_id_name="mod")

        tab_flow = TabFlow(
            tab.to_arrow(),
            [
                ExecuteStep(step1, ParamDType.Dict({}), {}),
                ExecuteStep(step2, ParamDType.Dict({}), {}),
            ],
        )

        expected = RenderResult(arrow_table({"B": [2]}))
        fake_load_module.return_value.render.return_value = expected
        with self._execute(
            workflow, tab_flow, {}, expect_log_level=logging.ERROR
        ) as result:
            assert_render_result_equals(result, expected)

        self.assertEqual(
            # called with step1, then step2
            fake_load_module.return_value.render.call_count,
            2,
        )
        self.assertRegex(
            # Output is to the correct file
            fake_load_module.return_value.render.call_args[1]["output_filename"],
            r"execute-tab-output.*\.arrow",
        )
Пример #13
0
    def test_report_module_error(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
        )

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            python_code="def render(table, params):\n  undefined()",
        )

        with self.assertLogs(level=logging.INFO):
            result = self.run_with_async_db(
                execute_step(
                    self.chroot_context,
                    workflow,
                    step,
                    module_zipfile,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))
        assert_render_result_equals(
            result,
            RenderResult(errors=[
                RenderError(
                    I18nMessage(
                        "py.renderer.execute.step.user_visible_bug_during_render",
                        {
                            "message":
                            "exit code 1: NameError: name 'undefined' is not defined"
                        },
                        None,
                    ))
            ]),
        )
Пример #14
0
 def test_default_render_returns_fetch_result(self):
     # Functionality used by libraryofcongress
     with ExitStack() as ctx:
         input_arrow_table = ctx.enter_context(
             arrow_table_context({"A": [1]}, dir=self.basedir)
         )
         parquet_filename = Path(
             ctx.enter_context(parquet_file({"A": [2]}, dir=self.basedir)).name
         ).name
         out_filename = ctx.enter_context(tempfile_context(dir=self.basedir)).name
         thrift_result = module.render_thrift(
             ttypes.RenderRequest(
                 str(self.basedir),
                 arrow_arrow_table_to_thrift(input_arrow_table),
                 {},  # params
                 ttypes.Tab("tab-1", "Tab 1"),
                 ttypes.FetchResult(
                     parquet_filename,
                     [
                         ttypes.RenderError(
                             ttypes.I18nMessage(
                                 "TODO_i18n",
                                 {
                                     "text": ttypes.I18nArgument(
                                         string_value="A warning"
                                     )
                                 },
                             ),
                             [],
                         )
                     ],
                 ),
                 out_filename,
             )
         )
         result = thrift_render_result_to_arrow(thrift_result, self.basedir)
         assert_render_result_equals(
             result,
             RenderResult(
                 arrow_table({"A": [2]}),
                 [RenderError(I18nMessage.TODO_i18n("A warning"))],
             ),
         )
Пример #15
0
    def test_resume_without_rerunning_unneeded_renders(self, fake_load_module):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        delta_id = workflow.last_delta_id
        ModuleVersion.create_or_replace_from_spec({
            "id_name": "mod",
            "name": "Mod",
            "category": "Clean",
            "parameters": []
        })

        # wf_module1: has a valid, cached result
        wf_module1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )
        cache_render_result(workflow, wf_module1, delta_id,
                            RenderResult(arrow_table({"A": [1]})))

        # wf_module2: has no cached result (must be rendered)
        wf_module2 = tab.wf_modules.create(
            order=1,
            slug="step-2",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )

        fake_loaded_module = Mock(LoadedModule)
        fake_loaded_module.migrate_params.return_value = {}
        fake_load_module.return_value = fake_loaded_module
        result2 = RenderResult(arrow_table({"A": [2]}))

        fake_loaded_module.render.return_value = result2
        self._execute(workflow)
        fake_loaded_module.render.assert_called_once()  # only with module2

        wf_module2.refresh_from_db()
        with open_cached_render_result(
                wf_module2.cached_render_result) as actual:
            assert_render_result_equals(actual, result2)
Пример #16
0
    def test_execute_cache_miss(self, fake_load_module):
        ModuleVersion.create_or_replace_from_spec(
            {"id_name": "mod", "name": "Mod", "category": "Clean", "parameters": []}
        )
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        step2 = tab.wf_modules.create(
            order=1,
            slug="step-2",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )

        tab_flow = TabFlow(
            tab.to_arrow(),
            [
                ExecuteStep(step1, ParamDType.Dict({}), {}),
                ExecuteStep(step2, ParamDType.Dict({}), {}),
            ],
        )

        expected = RenderResult(arrow_table({"B": [2]}))
        fake_load_module.return_value.render.return_value = expected
        with self._execute(workflow, tab_flow, {}) as result:
            assert_render_result_equals(result, expected)

        self.assertEqual(
            fake_load_module.return_value.render.call_count, 2  # step2, not step1
        )
        self.assertRegex(
            # Output is to the correct file
            fake_load_module.return_value.render.call_args[1]["output_filename"],
            r"execute-tab-output.*\.arrow",
        )
Пример #17
0
    def test_execute_cache_hit(self):
        module_zipfile = create_module_zipfile("mod")
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=workflow.last_delta_id)
        rendercache.cache_render_result(
            workflow,
            step1,
            workflow.last_delta_id,
            RenderResult(arrow_table({"A": [1]})),
        )
        step2 = tab.wf_modules.create(
            order=1,
            slug="step-2",
            last_relevant_delta_id=workflow.last_delta_id)
        rendercache.cache_render_result(
            workflow,
            step2,
            workflow.last_delta_id,
            RenderResult(arrow_table({"B": [2]})),
        )

        tab_flow = TabFlow(
            tab.to_arrow(),
            [
                ExecuteStep(step1, module_zipfile, {}),
                ExecuteStep(step2, module_zipfile, {}),
            ],
        )

        with patch.object(Kernel,
                          "render",
                          side_effect=mock_render({"No": ["bad"]})):
            with self._execute(workflow, tab_flow, {}) as result:
                assert_render_result_equals(
                    result, RenderResult(arrow_table({"B": [2]}), []))
Пример #18
0
    def test_execute_cache_miss(self):
        module_zipfile = create_module_zipfile("mod")
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        step2 = tab.wf_modules.create(
            order=1,
            slug="step-2",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )

        tab_flow = TabFlow(
            tab.to_arrow(),
            [
                ExecuteStep(step1, module_zipfile, {}),
                ExecuteStep(step2, module_zipfile, {}),
            ],
        )

        with patch.object(Kernel,
                          "render",
                          side_effect=mock_render({"B": [2]})):
            with self._execute(workflow, tab_flow, {}) as result:
                expected = RenderResult(arrow_table({"B": [2]}))
                assert_render_result_equals(result, expected)

            self.assertEqual(Kernel.render.call_count, 2)  # step2, not step1
            self.assertRegex(
                # Output is to the correct file
                Kernel.render.call_args[1]["output_filename"],
                r"execute-tab-output.*\.arrow",
            )
Пример #19
0
    def test_load_dynamic(self):
        code = b"def render(table, params):\n    return table * 2"
        minio.client.put_object(
            Bucket=minio.ExternalModulesBucket,
            Key="imported/abcdef/imported.py",
            Body=code,
            ContentLength=len(code),
        )

        with self.assertLogs("cjwstate.modules.loaded_module"):
            lm = LoadedModule.for_module_version(
                MockModuleVersion("imported", "abcdef", ParamDType.Dict({}), "now")
            )

        self.assertEqual(lm.name, "imported:abcdef")

        # This ends up being kinda an integration test.
        with ExitStack() as ctx:
            basedir = Path(ctx.enter_context(tempdir_context(prefix="test-basedir-")))
            basedir.chmod(0o755)
            input_table = ctx.enter_context(
                arrow_table_context({"A": [1]}, dir=basedir)
            )
            input_table.path.chmod(0o644)
            output_tf = ctx.enter_context(tempfile.NamedTemporaryFile(dir=basedir))

            ctx.enter_context(self.assertLogs("cjwstate.modules.loaded_module"))

            result = lm.render(
                basedir=basedir,
                input_table=input_table,
                params=Params({"col": "A"}),
                tab=Tab("tab-1", "Tab 1"),
                fetch_result=None,
                output_filename=Path(output_tf.name).name,
            )

        assert_render_result_equals(result, RenderResult(arrow_table({"A": [2]})))
Пример #20
0
    def test_resume_without_rerunning_unneeded_renders(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        delta_id = workflow.last_delta_id
        create_module_zipfile(
            # If this runs on step1, it'll return pd.DataFrame().
            # If this runs on step2, it'll return step1-output * 2.
            # ... step2's output depends on whether we run this on
            # step1.
            "mod",
            spec_kwargs={"loads_data": True},
            python_code="def render(table, params): return table * 2",
        )

        # step1: has a valid, cached result
        step1 = tab.steps.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=1,
            module_id_name="mod",
        )
        cache_render_result(workflow, step1, 1,
                            RenderResult(arrow_table({"A": [1]})))

        # step2: has no cached result (must be rendered)
        step2 = tab.steps.create(
            order=1,
            slug="step-2",
            last_relevant_delta_id=1,
            module_id_name="mod",
        )

        self._execute(workflow)

        step2.refresh_from_db()
        with open_cached_render_result(step2.cached_render_result) as actual:
            assert_render_result_equals(actual,
                                        RenderResult(arrow_table({"A": [2]})))
Пример #21
0
    def test_execute_new_revision(self, fake_load_module):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        delta1 = workflow.last_delta
        ModuleVersion.create_or_replace_from_spec({
            "id_name": "mod",
            "name": "Mod",
            "category": "Clean",
            "parameters": []
        })
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=delta1.id,
            module_id_name="mod",
        )

        result1 = RenderResult(arrow_table({"A": [1]}))
        cache_render_result(workflow, wf_module, delta1.id, result1)

        delta2 = InitWorkflowCommand.create(workflow)
        wf_module.last_relevant_delta_id = delta2.id
        wf_module.save(update_fields=["last_relevant_delta_id"])

        result2 = RenderResult(arrow_table({"B": [2]}))
        fake_module = Mock(LoadedModule)
        fake_module.migrate_params.return_value = {}
        fake_load_module.return_value = fake_module
        fake_module.render.return_value = result2

        self._execute(workflow)

        wf_module.refresh_from_db()

        with open_cached_render_result(
                wf_module.cached_render_result) as result:
            assert_render_result_equals(result, result2)
Пример #22
0
    def test_cache_render_result(self):
        result = RenderResult(
            arrow_table({"A": [1]}),
            [
                RenderError(
                    I18nMessage("e1", [1, "x"]),
                    [
                        QuickFix(
                            I18nMessage("q1", []),
                            QuickFixAction.PrependStep("filter", {"a": "x"}),
                        )
                    ],
                ),
                RenderError(I18nMessage("e2", []), []),
            ],
            {"foo": "bar"},
        )
        cache_render_result(self.workflow, self.wf_module, self.delta.id,
                            result)

        cached = self.wf_module.cached_render_result
        self.assertEqual(cached.wf_module_id, self.wf_module.id)
        self.assertEqual(cached.delta_id, self.delta.id)

        self.assertEqual(
            crr_parquet_key(cached),
            f"wf-{self.workflow.id}/wfm-{self.wf_module.id}/delta-{self.delta.id}.dat",
        )

        # Reading completely freshly from the DB should give the same thing
        db_wf_module = WfModule.objects.get(id=self.wf_module.id)
        from_db = db_wf_module.cached_render_result
        self.assertEqual(from_db, cached)

        with open_cached_render_result(from_db) as result2:
            assert_render_result_equals(result2, result)
    def test_duplicate_copies_fresh_cache(self):
        # The cache's filename depends on workflow_id and step_id.
        # Duplicating it would need more complex code :).
        result = RenderResult(arrow_table({"A": [1]}),
                              [RenderError(I18nMessage("X", {}, None), [])],
                              {})
        cache_render_result(self.workflow, self.step, 1, result)

        workflow2 = Workflow.objects.create()
        tab2 = workflow2.tabs.create(position=0)
        dup = self.step.duplicate_into_new_workflow(tab2)

        dup_cached_result = dup.cached_render_result
        self.assertEqual(
            dup_cached_result,
            replace(
                self.step.cached_render_result,
                workflow_id=workflow2.id,
                step_id=dup.id,
                delta_id=0,
            ),
        )
        with open_cached_render_result(dup_cached_result) as result2:
            assert_render_result_equals(result2, result)
Пример #24
0
    def test_execute_cache_hit(self, fake_module):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step1 = tab.wf_modules.create(
            order=0, slug="step-1", last_relevant_delta_id=workflow.last_delta_id
        )
        rendercache.cache_render_result(
            workflow,
            step1,
            workflow.last_delta_id,
            RenderResult(arrow_table({"A": [1]})),
        )
        step2 = tab.wf_modules.create(
            order=1, slug="step-2", last_relevant_delta_id=workflow.last_delta_id
        )
        rendercache.cache_render_result(
            workflow,
            step2,
            workflow.last_delta_id,
            RenderResult(arrow_table({"B": [2]})),
        )

        tab_flow = TabFlow(
            tab.to_arrow(),
            [
                ExecuteStep(step1, ParamDType.Dict({}), {}),
                ExecuteStep(step2, ParamDType.Dict({}), {}),
            ],
        )

        with self._execute(workflow, tab_flow, {}) as result:
            assert_render_result_equals(
                result, RenderResult(arrow_table({"B": [2]}), [])
            )

        fake_module.assert_not_called()
Пример #25
0
    def test_execute_mark_unreachable(self, send_update):
        future_none = asyncio.Future()
        future_none.set_result(None)
        send_update.return_value = future_none

        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        delta_id = workflow.last_delta_id
        create_module_zipfile(
            "mod",
            spec_kwargs={"loads_data": True},
            python_code=
            'def render(table, params): return "error, not warning"',
        )
        step1 = tab.steps.create(order=0, slug="step-1", module_id_name="mod")
        step2 = tab.steps.create(order=1, slug="step-2", module_id_name="mod")
        step3 = tab.steps.create(order=2, slug="step-3", module_id_name="mod")

        error_result = RenderResult(
            errors=[RenderError(TODO_i18n("error, not warning"))])

        self._execute(workflow)

        step1.refresh_from_db()
        self.assertEqual(step1.cached_render_result.status, "error")
        with open_cached_render_result(step1.cached_render_result) as result:
            assert_render_result_equals(result, error_result)

        step2.refresh_from_db()
        self.assertEqual(step2.cached_render_result.status, "unreachable")
        with open_cached_render_result(step2.cached_render_result) as result:
            assert_render_result_equals(result, RenderResult())

        step3.refresh_from_db()
        self.assertEqual(step3.cached_render_result.status, "unreachable")
        with open_cached_render_result(step3.cached_render_result) as result:
            assert_render_result_equals(result, RenderResult())

        send_update.assert_called_with(
            workflow.id,
            clientside.Update(
                steps={
                    step3.id:
                    clientside.StepUpdate(
                        render_result=step3.cached_render_result,
                        module_slug="mod")
                }),
        )
Пример #26
0
    def test_execute_mark_unreachable(self, send_delta_async,
                                      fake_load_module):
        send_delta_async.return_value = future_none

        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        delta_id = workflow.last_delta_id
        ModuleVersion.create_or_replace_from_spec({
            "id_name": "mod",
            "name": "Mod",
            "category": "Clean",
            "parameters": []
        })
        wf_module1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )
        wf_module2 = tab.wf_modules.create(
            order=1,
            slug="step-2",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )
        wf_module3 = tab.wf_modules.create(
            order=2,
            slug="step-3",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )

        fake_module = Mock(LoadedModule)
        fake_load_module.return_value = fake_module
        fake_module.migrate_params.return_value = {}
        error_result = RenderResult(
            errors=[RenderError(I18nMessage.TODO_i18n("error, not warning"))])
        fake_module.render.return_value = error_result

        self._execute(workflow)

        wf_module1.refresh_from_db()
        self.assertEqual(wf_module1.cached_render_result.status, "error")
        with open_cached_render_result(
                wf_module1.cached_render_result) as result:
            assert_render_result_equals(result, error_result)

        wf_module2.refresh_from_db()
        self.assertEqual(wf_module2.cached_render_result.status, "unreachable")
        with open_cached_render_result(
                wf_module2.cached_render_result) as result:
            assert_render_result_equals(result, RenderResult())

        wf_module3.refresh_from_db()
        self.assertEqual(wf_module3.cached_render_result.status, "unreachable")
        with open_cached_render_result(
                wf_module3.cached_render_result) as result:
            assert_render_result_equals(result, RenderResult())

        send_delta_async.assert_called_with(
            workflow.id,
            {
                "updateWfModules": {
                    str(wf_module3.id): {
                        "output_status": "unreachable",
                        "quick_fixes": [],
                        "output_error": "",
                        "output_columns": [],
                        "output_n_rows": 0,
                        "cached_render_result_delta_id": delta_id,
                    }
                }
            },
        )
Пример #27
0
    def test_execute_mark_unreachable(self, send_update, fake_load_module):
        future_none = asyncio.Future()
        future_none.set_result(None)
        send_update.return_value = future_none

        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        delta_id = workflow.last_delta_id
        ModuleVersion.create_or_replace_from_spec({
            "id_name": "mod",
            "name": "Mod",
            "category": "Clean",
            "parameters": []
        })
        wf_module1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )
        wf_module2 = tab.wf_modules.create(
            order=1,
            slug="step-2",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )
        wf_module3 = tab.wf_modules.create(
            order=2,
            slug="step-3",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )

        fake_module = Mock(LoadedModule)
        fake_load_module.return_value = fake_module
        fake_module.migrate_params.return_value = {}
        error_result = RenderResult(
            errors=[RenderError(I18nMessage.TODO_i18n("error, not warning"))])
        fake_module.render.return_value = error_result

        self._execute(workflow)

        wf_module1.refresh_from_db()
        self.assertEqual(wf_module1.cached_render_result.status, "error")
        with open_cached_render_result(
                wf_module1.cached_render_result) as result:
            assert_render_result_equals(result, error_result)

        wf_module2.refresh_from_db()
        self.assertEqual(wf_module2.cached_render_result.status, "unreachable")
        with open_cached_render_result(
                wf_module2.cached_render_result) as result:
            assert_render_result_equals(result, RenderResult())

        wf_module3.refresh_from_db()
        self.assertEqual(wf_module3.cached_render_result.status, "unreachable")
        with open_cached_render_result(
                wf_module3.cached_render_result) as result:
            assert_render_result_equals(result, RenderResult())

        send_update.assert_called_with(
            workflow.id,
            clientside.Update(
                steps={
                    wf_module3.id:
                    clientside.StepUpdate(
                        render_result=wf_module3.cached_render_result)
                }),
        )
Пример #28
0
 def test_execute_empty_tab(self):
     workflow = Workflow.create_and_init()
     tab = workflow.tabs.first()
     tab_flow = TabFlow(tab.to_arrow(), [])
     with self._execute(workflow, tab_flow, {}) as result:
         assert_render_result_equals(result, RenderResult())