示例#1
0
    def test_duplicate_copies_fresh_cache(self):
        # The cache's filename depends on workflow_id and step_id.
        # Duplicating it would need more complex code :).
        table = make_table(make_column("A", [1], format="${:,.2f}"))
        write_to_rendercache(
            self.workflow,
            self.step,
            1,
            table=table,
            errors=[RenderError(I18nMessage("X", {}, None))],
            json={"foo": "bar"},
        )

        workflow2 = Workflow.objects.create()
        tab2 = workflow2.tabs.create(position=0)
        dup = self.step.duplicate_into_new_workflow(tab2)

        dup_cached_result = dup.cached_render_result
        self.assertEqual(
            dup_cached_result,
            replace(
                self.step.cached_render_result,
                workflow_id=workflow2.id,
                step_id=dup.id,
                delta_id=0,
            ),
        )
        with open_cached_render_result(dup_cached_result) as result2:
            assert_arrow_table_equals(result2.table, table)
            self.assertEqual(result2.errors,
                             [RenderError(I18nMessage("X", {}, None))])
            self.assertEqual(result2.json, {"foo": "bar"})
示例#2
0
    def test_execute_new_revision(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        create_module_zipfile(
            "mod",
            spec_kwargs={"loads_data": True},
            python_code=
            'import pandas as pd\ndef render(table, params): return pd.DataFrame({"B": [2]})',
        )
        step = tab.steps.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=2,
            module_id_name="mod",
        )
        # stale
        write_to_rendercache(workflow, step, 1,
                             make_table(make_column("A", ["a"])))

        self._execute(workflow)

        step.refresh_from_db()

        with open_cached_render_result(step.cached_render_result) as result:
            assert_arrow_table_equals(result.table,
                                      make_table(make_column("B", [2])))
示例#3
0
    def test_deprecated_current_table_json(self):
        write_to_rendercache(
            self.workflow, self.step2, 2, make_table(make_column("A", ["a", "b"]))
        )

        response = self.client.get(f"/public/moduledata/live/{self.step2.id}.json")
        self.assertEqual(response.status_code, 200)
        self.assertEqual(read_streaming_json(response), [{"A": "a"}, {"A": "b"}])
示例#4
0
    def test_deprecated_current_table_csv(self):
        write_to_rendercache(
            self.workflow, self.step2, 2, make_table(make_column("A", ["a", "b"]))
        )

        response = self.client.get(f"/public/moduledata/live/{self.step2.id}.csv")
        self.assertEqual(response.status_code, 200)
        self.assertEqual(b"".join(response.streaming_content), b"A\na\nb")
示例#5
0
    def test_tile_row_out_of_bounds(self):
        write_to_rendercache(
            self.workflow, self.step2, 2, make_table(make_column("A", [1]))
        )

        response = self.client.get(
            f"/workflows/{self.workflow.id}/tiles/step-2/delta-2/1,0.json"
        )
        self.assertEqual(response.status_code, status.NOT_FOUND)
        self.assertEqual(json.loads(response.content), {"error": "tile out of bounds"})
示例#6
0
    def test_json(self):
        write_to_rendercache(
            self.workflow, self.step2, 2, make_table(make_column("A", ["a", "b"]))
        )

        response = self.client.get(
            f"/workflows/{self.workflow.id}/tiles/step-2/delta-2/0,0.json"
        )
        self.assertEqual(response.status_code, 200)
        self.assertEqual(json.loads(response.content), {"rows": [["a"], ["b"]]})
示例#7
0
    def test_current_table_json(self):
        write_to_rendercache(
            self.workflow, self.step2, 2, make_table(make_column("A", ["a", "b"]))
        )

        response = self.client.get(
            f"/workflows/{self.workflow.id}/steps/step-2/current-result-table.json"
        )
        self.assertEqual(response.status_code, 200)
        self.assertEqual(read_streaming_json(response), [{"A": "a"}, {"A": "b"}])
示例#8
0
    def test_current_table_csv(self):
        write_to_rendercache(
            self.workflow, self.step2, 2, make_table(make_column("A", ["a", "b"]))
        )

        response = self.client.get(
            f"/workflows/{self.workflow.id}/steps/step-2/current-result-table.csv"
        )
        self.assertEqual(response.status_code, 200)
        self.assertEqual(b"".join(response.streaming_content), b"A\na\nb")
示例#9
0
    def test_empty_json(self):
        write_to_rendercache(
            self.workflow, self.step, 1, make_table(make_column("A", [1])), json={}
        )

        response = self._request()

        self.assertEqual(response.status_code, status.NOT_FOUND)
        self.assertEqual(
            json.loads(response.content), {"error": "render result has no JSON"}
        )
示例#10
0
    def test_disallow_non_text(self):
        write_to_rendercache(
            self.workflow,
            self.step1,
            self.step1.last_relevant_delta_id,
            make_table(make_column("A", [1, 2, 3, 2, 1])),
        )

        response = self._request("A")

        self.assertEqual(response.status_code, 200)
        self.assertEqual(json.loads(response.content), {"values": {}})
示例#11
0
    def test_resume_backtrack_on_corrupt_cache_error(self):
        module_zipfile = create_module_zipfile(
            "mod", spec_kwargs={"loads_data": True})
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        # step1: cached result is fresh -- but CORRUPT
        step1 = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        write_to_rendercache(workflow, step1, workflow.last_delta_id,
                             make_table(make_column("A", [1])))
        step1.refresh_from_db()
        s3.put_bytes(
            # Write corrupted data -- will lead to CorruptCacheError
            rendercache.io.BUCKET,
            rendercache.io.crr_parquet_key(step1.cached_render_result),
            b"CORRUPT",
        )
        # step2: no cached result -- must re-render
        step2 = tab.steps.create(order=1, slug="step-2", module_id_name="mod")

        tab_flow = TabFlow(
            Tab(tab.slug, tab.name),
            [
                ExecuteStep(step1, module_zipfile, {}),
                ExecuteStep(step2, module_zipfile, {}),
            ],
        )

        new_table = make_table(make_column("B", ["b"]))

        with patch.object(Kernel, "render",
                          side_effect=mock_render(new_table)):
            with self._execute(workflow,
                               tab_flow, {},
                               expect_log_level=logging.ERROR) as (result,
                                                                   path):
                self.assertEqual(
                    result, StepResult(path, [Column("B", ColumnType.Text())]))

            self.assertEqual(
                # called with step1, then step2
                Kernel.render.call_count,
                2,
            )
            self.assertRegex(
                # Output is to the correct file
                Kernel.render.call_args[1]["output_filename"],
                r"execute-tab-output.*\.arrow",
            )
示例#12
0
    def test_execute_partial_cache_hit(self):
        module_zipfile = create_module_zipfile(
            "mod", spec_kwargs={"loads_data": True})
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        # step1: cached result is fresh. Should not render.
        step1 = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        write_to_rendercache(workflow, step1, workflow.last_delta_id,
                             make_table(make_column("A", ["a"])))
        # step2: cached result is stale, so must be re-rendered
        step2 = tab.steps.create(
            order=1,
            slug="step-2",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        write_to_rendercache(
            workflow,
            step2,
            workflow.last_delta_id - 1,
            make_table(make_column("B", ["b"])),
        )

        tab_flow = TabFlow(
            Tab(tab.slug, tab.name),
            [
                ExecuteStep(step1, module_zipfile, {}),
                ExecuteStep(step2, module_zipfile, {}),
            ],
        )

        new_table = make_table(make_column("C", ["c"]))

        with patch.object(Kernel, "render",
                          side_effect=mock_render(new_table)):
            with self._execute(workflow, tab_flow, {}) as (result, path):
                self.assertEqual(
                    result, StepResult(path, [Column("C", ColumnType.Text())]))
                assert_arrow_table_equals(load_trusted_arrow_file(path),
                                          new_table)

            Kernel.render.assert_called_once()  # step2, not step1

            self.assertRegex(
                # Output is to the correct file
                Kernel.render.call_args[1]["output_filename"],
                r"execute-tab-output.*\.arrow",
            )
示例#13
0
    def test_duplicate_ignores_stale_cache(self):
        # write to the wrong delta ID: "stale"
        write_to_rendercache(self.workflow, self.step, 5,
                             make_table(make_column("A", [1])))

        workflow2 = Workflow.objects.create()
        tab2 = workflow2.tabs.create(position=0)
        dup = self.step.duplicate_into_new_workflow(tab2)

        dup_cached_result = dup.cached_render_result
        self.assertIsNone(dup_cached_result)
        self.assertEqual(dup.cached_render_result_status, None)
示例#14
0
    def test_delete_step(self):
        write_to_rendercache(
            self.workflow,
            self.step,
            1,
            table=make_table(make_column("A", [1])),
            errors=[RenderError(I18nMessage("X", {}, None), [])],
            json={"foo": "bar"},
        )

        parquet_key = crr_parquet_key(self.step.cached_render_result)
        self.step.delete()
        self.assertFalse(s3.exists(BUCKET, parquet_key))
示例#15
0
    def test_json(self):
        write_to_rendercache(
            self.workflow,
            self.step,
            1,
            make_table(make_column("A", [1])),
            json={"hello": "world!"},
        )

        response = self._request()

        self.assertEqual(response.status_code, status.OK)
        self.assertEqual(json.loads(response.content), {"hello": "world!"})
示例#16
0
    def test_str(self):
        write_to_rendercache(
            self.workflow,
            self.step1,
            self.step1.last_relevant_delta_id,
            make_table(make_column("A", ["a", "b", "b", "a", "c", None])),
        )

        response = self._request("A")

        self.assertEqual(response.status_code, 200)
        self.assertEqual(
            json.loads(response.content), {"values": {"a": 2, "b": 2, "c": 1}}
        )
示例#17
0
    def test_corrupt_cache_error(self):
        write_to_rendercache(
            self.workflow, self.step2, 2, make_table(make_column("A", [1]))
        )
        delete_parquet_files_for_step(self.workflow.id, self.step2.id)

        response = self.client.get(
            f"/workflows/{self.workflow.id}/tiles/step-2/delta-2/0,0.json"
        )
        self.assertEqual(response.status_code, status.NOT_FOUND)
        self.assertEqual(
            json.loads(response.content),
            {"error": "result went away; please try again with another delta_id"},
        )
示例#18
0
    def test_wrong_column(self):
        write_to_rendercache(
            self.workflow,
            self.step1,
            self.step1.last_relevant_delta_id,
            make_table(make_column("A", ["a"])),
        )

        response = self._request("B")

        self.assertEqual(response.status_code, status.NOT_FOUND)
        self.assertEqual(
            json.loads(response.content), {"error": 'column "B" not found'}
        )
示例#19
0
    def test_init_state(self):
        create_module_zipfile("chart", spec_kwargs={"html_output": True}, html="hi")
        write_to_rendercache(
            self.workflow, self.step, 1, make_table(make_column("A", [1])), json={}
        )

        with self.assertLogs("cjwstate.params", level="INFO"):
            response = self._request()

        self.assertEqual(response.status_code, status.OK)
        init_state_match = re.search(br"window.initState =([^\n]*)", response.content)
        init_state = json.loads(init_state_match.group(1))
        self.assertEqual(init_state["workflow"]["id"], self.workflow.id)
        self.assertEqual(init_state["step"]["module"], "chart")
        self.assertEqual(init_state["step"]["slug"], "step-1")
示例#20
0
 def test_load_input_cached_render_result(self):
     input_table = make_table(make_column("A", [1]))
     workflow = Workflow.create_and_init()
     step1 = workflow.tabs.first().steps.create(
         order=0,
         slug="step-1",
         last_relevant_delta_id=workflow.last_delta_id)
     write_to_rendercache(workflow, step1, workflow.last_delta_id,
                          input_table)
     step2 = workflow.tabs.first().steps.create(order=1, slug="step-2")
     result = self.run_with_async_db(
         fetch.load_database_objects(workflow.id, step2.id))
     self.assertEqual(result[4], step1.cached_render_result)
     self.assertEqual(result.input_cached_render_result,
                      step1.cached_render_result)
示例#21
0
    def test_email_delta_when_errors_change(self, email_delta):
        user = create_test_user()
        workflow = Workflow.create_and_init(owner_id=user.id)
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
            notifications=True,
        )
        # We need to actually populate the cache to set up the test. The code
        # under test will only try to open the render result if the database
        # says there's something there.
        write_to_rendercache(
            workflow,
            step,
            workflow.last_delta_id - 1,  # stale
            table=make_table(),
            errors=[
                RenderError(I18nMessage("py.renderer.execute.step.noModule", {}, None))
            ],
        )

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            # returns different error
            python_code='import pandas as pd\ndef render(table, params): return [{"id": "err"}]',
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    chroot_context=self.chroot_context,
                    workflow=workflow,
                    step=step,
                    module_zipfile=module_zipfile,
                    params={},
                    tab_name=tab.name,
                    input_path=self.empty_table_path,
                    input_table_columns=[],
                    tab_results={},
                    output_path=self.output_path,
                )
            )

        email_delta.assert_called()  # there's new data
示例#22
0
 def test_auth_report_viewer_allowed_auto_report_chart(self):
     write_to_rendercache(
         self.workflow,
         self.step,
         1,
         make_table(make_column("A", [1])),
         json={"hello": "world!"},
     )
     user = create_test_user("alice", "*****@*****.**")
     self.workflow.acl.create(email="*****@*****.**", role=Role.REPORT_VIEWER)
     self.client.force_login(user)
     create_module_zipfile("chart", spec_kwargs={"html_output": True})
     self.step.module_id_name = "chart"
     self.step.save(update_fields=["module_id_name"])
     response = self._request()
     self.assertEqual(response.status_code, 200, "Should have access to Chart step")
示例#23
0
    def test_current_table_zero_columns(self):
        write_to_rendercache(self.workflow, self.step2, 2, make_table())

        # CSV
        response = self.client.get(
            f"/workflows/{self.workflow.id}/steps/step-2/current-result-table.csv"
        )
        self.assertEqual(response.status_code, status.OK)
        self.assertEqual(list(response.streaming_content), [])

        # JSON
        response = self.client.get(
            f"/workflows/{self.workflow.id}/steps/step-2/current-result-table.json"
        )
        self.assertEqual(response.status_code, status.OK)
        self.assertEqual(read_streaming_json(response), [])
示例#24
0
 def test_workflow_view_triggers_render_if_stale_cache(self):
     step = self.tab1.steps.create(
         order=0,
         slug="step-1",
         last_relevant_delta_id=1,
         cached_render_result_delta_id=1,
     )
     # Cache a stale result
     write_to_rendercache(self.workflow1, step, 1,
                          make_table(make_column("A", ["a"])))
     step.last_relevant_delta_id = 2
     step.save(update_fields=["last_relevant_delta_id"])
     self.client.force_login(self.user)
     self.client.get("/workflows/%d/" % self.workflow1.id)
     self.queue_render.assert_called_with(self.workflow1.id,
                                          self.workflow1.last_delta_id)
示例#25
0
    def test_cached_result_has_wrong_delta_id(self):
        write_to_rendercache(
            self.workflow, self.step2, 2, make_table(make_column("A", [1]))
        )
        self.step2.cached_render_result_delta_id = 3
        self.step2.last_relevant_delta_id = 3
        self.step2.save(
            update_fields=["cached_render_result_delta_id", "last_relevant_delta_id"]
        )

        response = self.client.get(
            f"/workflows/{self.workflow.id}/tiles/step-2/delta-2/0,0.json"
        )
        self.assertEqual(response.status_code, status.NOT_FOUND)
        self.assertEqual(
            json.loads(response.content), {"error": "delta_id result not cached"}
        )
示例#26
0
    def test_cached_result_has_wrong_delta_id(self):
        write_to_rendercache(
            self.workflow,
            self.step,
            1,
            make_table(make_column("A", [1])),
            json={"hello": "world"},
        )
        self.step.last_relevant_delta_id = 3
        self.step.save(update_fields=["last_relevant_delta_id"])

        response = self._request()

        self.assertEqual(response.status_code, status.NOT_FOUND)
        self.assertEqual(
            json.loads(response.content), {"error": "render result not in cache"}
        )
示例#27
0
    def test_email_delta_ignore_corrupt_cache_error(self, email_delta, read_cache):
        user = create_test_user()
        workflow = Workflow.create_and_init(owner_id=user.id)
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
            notifications=True,
        )
        # We need to actually populate the cache to set up the test. The code
        # under test will only try to open the render result if the database
        # says there's something there.
        write_to_rendercache(
            workflow,
            step,
            workflow.last_delta_id - 1,  # stale
            make_table(make_column("A", [1])),
        )
        read_cache.side_effect = rendercache.CorruptCacheError

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            # returns different data -- but CorruptCacheError means we won't care.
            python_code='import pandas as pd\ndef render(table, params): return pd.DataFrame({"A": [2]})',
        )

        with self.assertLogs(level=logging.ERROR):
            self.run_with_async_db(
                execute_step(
                    chroot_context=self.chroot_context,
                    workflow=workflow,
                    step=step,
                    module_zipfile=module_zipfile,
                    params={},
                    tab_name=tab.name,
                    input_path=self.empty_table_path,
                    input_table_columns=[],
                    tab_results={},
                    output_path=self.output_path,
                )
            )

        email_delta.assert_not_called()
示例#28
0
 def test_auth_report_viewer_allowed_custom_report_chart(self):
     write_to_rendercache(
         self.workflow,
         self.step,
         1,
         make_table(make_column("A", [1])),
         json={"hello": "world!"},
     )
     user = create_test_user("alice", "*****@*****.**")
     self.workflow.acl.create(email="*****@*****.**", role=Role.REPORT_VIEWER)
     self.workflow.has_custom_report = True
     self.workflow.save(update_fields=["has_custom_report"])
     self.workflow.blocks.create(
         position=0, slug="block-1", block_type="Chart", step_id=self.step.id
     )
     self.client.force_login(user)
     response = self._request()
     self.assertEqual(response.status_code, 200, "Should have access to Chart step")
示例#29
0
    def test_email_delta(self, email_delta):
        user = create_test_user()
        workflow = Workflow.create_and_init(owner_id=user.id)
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
            notifications=True,
        )
        write_to_rendercache(
            workflow,
            step,
            workflow.last_delta_id - 1,  # stale
            make_table(make_column("A", [1])),
        )

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            python_code='import pandas as pd\ndef render(table, params): return pd.DataFrame({"A": [2]})',
        )
        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    chroot_context=self.chroot_context,
                    workflow=workflow,
                    step=step,
                    module_zipfile=module_zipfile,
                    params={},
                    tab_name=tab.name,
                    input_path=self.empty_table_path,
                    input_table_columns=[],
                    tab_results={},
                    output_path=self.output_path,
                )
            )
        email_delta.assert_called()
        delta = email_delta.call_args[0][0]

        self.assertEqual(delta.user, workflow.owner)
        self.assertEqual(delta.workflow, workflow)
        self.assertEqual(delta.step, step)
示例#30
0
    def test_email_delta_when_fresh_crr_is_unreachable(self, email_delta):
        user = create_test_user()
        workflow = Workflow.create_and_init(owner_id=user.id)
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
            notifications=True,
        )
        write_to_rendercache(
            workflow,
            step,
            workflow.last_delta_id - 1,
            make_table(make_column("A", [1])),
        )

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            # returns empty result -- meaning, "unreachable"
            python_code="import pandas as pd\ndef render(table, params): return pd.DataFrame({})",
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    chroot_context=self.chroot_context,
                    workflow=workflow,
                    step=step,
                    module_zipfile=module_zipfile,
                    params={},
                    tab_name=tab.name,
                    input_path=self.empty_table_path,
                    input_table_columns=[],
                    tab_results={},
                    output_path=self.output_path,
                )
            )

        email_delta.assert_called()  # there's new data -- or, well, non-data