def test_find_workflow_pick_max_plan(self): plan_max_age = datetime.timedelta(days=30) owner = create_user() price1 = create_price( product=create_product(max_delta_age_in_days=1), stripe_price_id="price_plan1", ) price2 = create_price( product=create_product(max_delta_age_in_days=30), stripe_price_id="price_plan2", ) create_subscription(owner, price1, stripe_subscription_id="sub_1") create_subscription(owner, price2, stripe_subscription_id="sub_2") with freeze_time("2020-01-01"): workflow1 = Workflow.create_and_init(owner=owner) do(SetWorkflowTitle, workflow1.id, new_value="1") with freeze_time("2020-01-15"): workflow2 = Workflow.create_and_init(owner=owner) do(SetWorkflowTitle, workflow2.id, new_value="1") now = datetime.datetime(2020, 2, 2) result = find_workflows_with_stale_deltas(now) self.assertEqual(result, [(workflow1.id, now - plan_max_age)])
def test_set_block_markdown_happy_path(self, send_update): future_none = asyncio.Future() future_none.set_result(None) send_update.return_value = future_none workflow = Workflow.create_and_init(has_custom_report=True) workflow.blocks.create(position=0, slug="block-1", block_type="Text", text_markdown="foo") self.run_with_async_db( commands.do( SetBlockMarkdown, workflow_id=workflow.id, slug="block-1", markdown="bar", )) self.assertEqual( list(workflow.blocks.values_list("text_markdown", flat=True)), ["bar"]) delta1 = send_update.call_args[0][1] self.assertEqual(delta1.blocks, {"block-1": clientside.TextBlock("bar")}) self.run_with_async_db(commands.undo(workflow.id)) self.assertEqual( list(workflow.blocks.values_list("text_markdown", flat=True)), ["foo"]) delta2 = send_update.call_args[0][1] self.assertEqual(delta2.blocks, {"block-1": clientside.TextBlock("foo")})
def test_no_hard_or_soft_delete_when_deleting_applied_delta(self): workflow = Workflow.create_and_init() cmd = self.run_with_async_db( commands.do(AddTab, workflow_id=workflow.id, slug="tab-2", name="A") ) cmd.delete() self.assertEquals(workflow.live_tabs.count(), 2)
def test_execute_new_revision(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() create_module_zipfile( "mod", spec_kwargs={"loads_data": True}, python_code= 'import pandas as pd\ndef render(table, params): return pd.DataFrame({"B": [2]})', ) step = tab.steps.create( order=0, slug="step-1", last_relevant_delta_id=2, module_id_name="mod", ) # stale write_to_rendercache(workflow, step, 1, make_table(make_column("A", ["a"]))) self._execute(workflow) step.refresh_from_db() with open_cached_render_result(step.cached_render_result) as result: assert_arrow_table_equals(result.table, make_table(make_column("B", [2])))
def test_change_parameters_deny_invalid_params(self): workflow = Workflow.create_and_init() step = workflow.tabs.first().steps.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, params={"x": 1}, ) create_module_zipfile( "x", spec_kwargs={"parameters": [{ "id_name": "x", "type": "integer" }]}) self.kernel.migrate_params.side_effect = lambda m, p: p with self.assertRaises(ValueError), self.assertLogs( level=logging.INFO): # Now the user requests to change params, giving an invalid param. self.run_with_async_db( commands.do( SetStepParams, workflow_id=workflow.id, step=step, new_values={"x": "Threeve"}, ))
def test_execute_migrate_params_module_error_gives_default_params(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() create_module_zipfile( "mod", spec_kwargs={ "loads_data": True, "parameters": [{ "id_name": "x", "type": "string", "default": "def" }], }, python_code=textwrap.dedent(""" import json def render(table, params): return "params: " + json.dumps(params) def migrate_params(params): cause_module_error() # NameError """), ) step = tab.steps.create(order=0, slug="step-1", module_id_name="mod", params={"x": "good"}) self._execute(workflow) step.refresh_from_db() self.assertEqual( step.cached_render_result_errors, [RenderError(TODO_i18n('params: {"x": "def"}'))], )
def test_clean_file_no_s3_file(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create(module_id_name="uploadfile", order=0, slug="step-1") step2 = tab.steps.create(module_id_name="uploadfile", order=1, slug="step-2") id = str(uuid.uuid4()) key = f"wf-${workflow.id}/wfm-${step.id}/${id}" # Oops -- let's _not_ put the file! # s3.put_bytes(s3.UserFilesBucket, key, b'1234') UploadedFileModel.objects.create(step=step2, name="x.csv.gz", size=4, uuid=id, key=key) result = self._call_clean_value(ParamSchema.File(), id, step_id=step.id) self.assertIsNone(result) # Assert that if a temporary file was created to house the download, it # no longer exists. self.assertListEqual(list(self.basedir.iterdir()), [])
def test_clean_file_safe_filename(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create(module_id_name="uploadfile", order=0, slug="step-1") key = f"wf-${workflow.id}/wfm-${step.id}/6e00511a-8ac4-4b72-9acc-9d069992b5cf" s3.put_bytes(s3.UserFilesBucket, key, b"1234") model = UploadedFileModel.objects.create( step=step, name="/etc/passwd.$/etc/passwd", size=4, uuid="6e00511a-8ac4-4b72-9acc-9d069992b5cf", key=key, ) with ExitStack() as inner_stack: result = self._call_prep_params( ParamSchema.Dict({"file": ParamSchema.File()}), {"file": "6e00511a-8ac4-4b72-9acc-9d069992b5cf"}, step_id=step.id, exit_stack=inner_stack, ) self.assertEqual( result.uploaded_files["6e00511a-8ac4-4b72-9acc-9d069992b5cf"], UploadedFile( "/etc/passwd.$/etc/passwd", "6e00511a-8ac4-4b72-9acc-9d069992b5cf_-etc-passwd.--etc-passwd", model.created_at, ), )
def test_delete_remove_leaked_stored_objects_and_uploaded_files(self): workflow = Workflow.create_and_init() # If the user deletes a workflow, all data associated with that # workflow should disappear. Postgres handles DB objects; but Django's # ORM doesn't do a great job with StoredObjects and UploadedFiles. # # This test isn't about minutae. It's just: if the user deletes a # Workflow, make sure all data gets deleted. # # TODO fix all other bugs that leak data. wf_module = workflow.tabs.first().wf_modules.create( order=0, slug="step-1", module_id_name="x" ) # "Leak" a StoredObject by writing its file to S3 but neglecting to # write an accompanying StoredObject record. stored_object_key = f"{workflow.id}/{wf_module.id}/1234.dat" minio.put_bytes(minio.StoredObjectsBucket, stored_object_key, b"1234") # Add UploadedFile, missing a DB entry. (Even if we fix all bugs that # leak an S3 object after deleting a DB entry [and 2019-06-03 there are # still more] we'll still need to handle missing DB entries from legacy # code.) uploaded_file_key = f"{wf_module.uploaded_file_prefix}{uuid.uuid4()}.csv" minio.put_bytes(minio.UserFilesBucket, uploaded_file_key, b"A\nb") workflow.delete() self.assertFalse(minio.exists(minio.StoredObjectsBucket, stored_object_key)) self.assertFalse(minio.exists(minio.UserFilesBucket, uploaded_file_key))
def test_delete_orphans(self): workflow = Workflow.create_and_init() self.run_with_async_db( commands.do(SetWorkflowTitle, workflow_id=workflow.id, new_value="1")) delta2 = self.run_with_async_db( commands.do(SetWorkflowTitle, workflow_id=workflow.id, new_value="2")) delta3 = self.run_with_async_db( commands.do(SetWorkflowTitle, workflow_id=workflow.id, new_value="3")) self.run_with_async_db(commands.undo(workflow.id)) self.run_with_async_db(commands.undo(workflow.id)) # Create a new delta ... making delta2 and delta3 obsolete self.run_with_async_db( commands.do(SetWorkflowTitle, workflow_id=workflow.id, new_value="4")) with self.assertRaises(Delta.DoesNotExist): delta2.refresh_from_db() with self.assertRaises(Delta.DoesNotExist): delta3.refresh_from_db()
def test_change_notes(self): workflow = Workflow.create_and_init() step = workflow.tabs.first().steps.create( order=0, slug="step-1", notes="text1", last_relevant_delta_id=workflow.last_delta_id, ) # do self.run_with_async_db( commands.do( SetStepNote, workflow_id=workflow.id, step=step, new_value="text2", ) ) self.assertEqual(step.notes, "text2") step.refresh_from_db() self.assertEqual(step.notes, "text2") # undo self.run_with_async_db(commands.undo(workflow.id)) step.refresh_from_db() self.assertEqual(step.notes, "text1") # redo self.run_with_async_db(commands.redo(workflow.id)) step.refresh_from_db() self.assertEqual(step.notes, "text2")
def test_add_module_default_params(self): workflow = Workflow.create_and_init() create_module_zipfile( "blah", spec_kwargs={ "parameters": [ { "id_name": "a", "type": "string", "default": "x" }, { "id_name": "c", "type": "checkbox", "name": "C", "default": True }, ] }, ) cmd = self.run_with_async_db( commands.do( AddStep, workflow_id=workflow.id, tab=workflow.tabs.first(), slug="step-1", module_id_name="blah", position=0, param_values={}, )) self.assertEqual(cmd.step.params, {"a": "x", "c": True})
def setUp(self): super().setUp() self.workflow = Workflow.create_and_init() self.step = self.workflow.tabs.first().steps.create( order=0, slug="step-1", last_relevant_delta_id=self.workflow.last_delta_id )
def test_delete_restore_table_block(self, send_update): future_none = asyncio.Future() future_none.set_result(None) send_update.return_value = future_none workflow = Workflow.create_and_init(has_custom_report=True) tab = workflow.tabs.first() workflow.blocks.create(position=0, slug="block-1", block_type="Text", text_markdown="1") workflow.blocks.create(position=1, slug="block-2", block_type="Table", tab_id=tab.id) self.run_with_async_db( commands.do(DeleteBlock, workflow_id=workflow.id, slug="block-2")) self.run_with_async_db(commands.undo(workflow.id)) self.assertEqual( list( workflow.blocks.values_list("slug", "position", "block_type", "tab_id")), [("block-1", 0, "Text", None), ("block-2", 1, "Table", tab.id)], )
def test_delete_tab(self): workflow = Workflow.create_and_init() # tab-1 tab2 = workflow.tabs.create(position=1, slug="tab-2") workflow.tabs.create(position=2, slug="tab-3") self.run_with_async_db( commands.do(DeleteTab, workflow_id=workflow.id, tab=tab2)) tab2.refresh_from_db() # it is only _soft_-deleted. self.assertEqual(tab2.is_deleted, True) self.assertEqual( list(workflow.live_tabs.values_list("slug", "position")), [("tab-1", 0), ("tab-3", 1)], ) self.run_with_async_db(commands.undo(workflow.id)) tab2.refresh_from_db() self.assertEqual(tab2.is_deleted, False) self.assertEqual( list(workflow.live_tabs.values_list("slug", "position")), [("tab-1", 0), ("tab-2", 1), ("tab-3", 2)], ) self.run_with_async_db(commands.redo(workflow.id)) tab2.refresh_from_db() self.assertEqual(tab2.is_deleted, True) self.assertEqual( list(workflow.live_tabs.values_list("slug", "position")), [("tab-1", 0), ("tab-3", 1)], )
def test_deleted_module(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create( order=0, slug="step-1", module_id_name="deleted_module", last_relevant_delta_id=workflow.last_delta_id, ) result = self.run_with_async_db( execute_step( chroot_context=self.chroot_context, workflow=workflow, step=step, module_zipfile=None, params={}, tab_name=tab.name, input_path=self.empty_table_path, input_table_columns=[], tab_results={}, output_path=self.output_path, ) ) self.assertEqual(result.columns, []) self.assertEqual(self.output_path.read_bytes(), b"") step.refresh_from_db() self.assertEqual( step.cached_render_result.errors, [RenderError(I18nMessage("py.renderer.execute.step.noModule", {}, None))], )
def test_change_last_relevant_delta_ids_of_dependent_steps(self): workflow = Workflow.create_and_init() delta_id = workflow.last_delta_id tab1 = workflow.tabs.first() tab2 = workflow.tabs.create(position=1, slug="tab-2", name="Tab 2") # Add a Step that depends on tab1 module_zipfile = create_module_zipfile( "x", spec_kwargs={"parameters": [{"id_name": "tab", "type": "tab"}]} ) step = tab2.steps.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=delta_id, params={"tab": tab1.slug}, cached_migrated_params={"tab": tab1.slug}, cached_migrated_params_module_version=module_zipfile.version, ) cmd = self.run_with_async_db( commands.do( SetTabName, workflow_id=workflow.id, tab=tab1, new_name=tab1.name + "X", ) ) step.refresh_from_db() self.assertEqual(step.last_relevant_delta_id, cmd.id)
def test_delete_orphans_does_not_delete_new_tab(self): # Don't delete a new AddTab's new orphan Tab during creation. # # We delete orphans Deltas during creation, and we should delete their # Tabs/Steps. But we shouldn't delete _new_ Tabs/Steps. (We need # to order creation and deletion carefully to avoid doing so.) workflow = Workflow.create_and_init() # Create a soft-deleted Tab in an orphan Delta (via AddTab) delta1 = self.run_with_async_db( commands.do(AddTab, workflow_id=workflow.id, slug="tab-2", name="name-2")) self.run_with_async_db(commands.undo(workflow.id)) # Now create a new Tab in a new Delta. This will delete delta1, and it # _should_ delete `tab-2`. self.run_with_async_db( commands.do(AddTab, workflow_id=workflow.id, slug="tab-3", name="name-3")) with self.assertRaises(Tab.DoesNotExist): delta1.tab.refresh_from_db() # orphan tab was deleted with self.assertRaises(Delta.DoesNotExist): delta1.refresh_from_db()
def test_do_delete_partial_delta_chain(self, send_update): send_update.side_effect = async_noop workflow = Workflow.create_and_init(name="hello") delta1 = self.run_with_async_db( commands.do(SetWorkflowTitle, workflow_id=workflow.id, new_value="1")) self.run_with_async_db( commands.do(SetWorkflowTitle, workflow_id=workflow.id, new_value="2")) self.run_with_async_db(commands.undo(workflow.id)) send_update.reset_mock() delta3 = self.run_with_async_db( commands.do(SetWorkflowTitle, workflow_id=workflow.id, new_value="3")) send_update.assert_called() workflow.refresh_from_db() self.assertEqual(workflow.name, "3") self.assertEqual(workflow.last_delta_id, delta3.id) self.assertEqual(list(workflow.deltas.values_list("id", flat=True)), [delta1.id, delta3.id])
def test_delete_protects_soft_deleted_step_with_reference(self): workflow = Workflow.create_and_init() # Here's a soft-deleted module step = workflow.tabs.first().steps.create(order=0, slug="step-1", module_id_name="foo", is_deleted=True) # "protect" it: here's a delta we _aren't_ deleting self.run_with_async_db( commands.do( SetStepNote, workflow_id=workflow.id, step=step, new_value="1", )) # now delete a delta delta2 = self.run_with_async_db( commands.do(SetWorkflowTitle, workflow_id=workflow.id, new_value="1")) self.run_with_async_db(commands.undo( workflow.id)) # fix workflow.last_delta_id delta2.delete() workflow.delete_orphan_soft_deleted_models() step.refresh_from_db() # no DoesNotExist -- a delta depends on it
def test_delete_lone_step(self): workflow = Workflow.create_and_init() # tab-1 tab1 = workflow.tabs.first() step = tab1.steps.create( order=0, slug="step-1", last_relevant_delta_id=workflow.last_delta_id, params={"url": ""}, ) self.run_with_async_db( commands.do(DeleteStep, workflow_id=workflow.id, step=step)) step.refresh_from_db() # it is only _soft_-deleted. self.assertEqual(step.is_deleted, True) self.assertEqual(list(tab1.live_steps.values_list("slug", "order")), []) self.run_with_async_db(commands.undo(workflow.id)) step.refresh_from_db() self.assertEqual(step.is_deleted, False) self.assertEqual(list(tab1.live_steps.values_list("slug", "order")), [("step-1", 0)]) self.run_with_async_db(commands.redo(workflow.id)) step.refresh_from_db() # it is only _soft_-deleted. self.assertEqual(step.is_deleted, True) self.assertEqual(list(tab1.live_steps.values_list("slug", "order")), [])
def test_delete_tab_0(self): workflow = Workflow.create_and_init(selected_tab_position=0) tab1 = workflow.tabs.first() workflow.tabs.create(position=1) self.run_with_async_db( commands.do(DeleteTab, workflow_id=workflow.id, tab=tab1)) workflow.refresh_from_db() self.assertEqual(workflow.selected_tab_position, 0)
def test_do_set_last_applied_at(self): date0 = datetime.datetime.now() workflow = Workflow.create_and_init() with freeze_time(date0): delta = self.run_with_async_db( commands.do(SetWorkflowTitle, workflow_id=workflow.id, new_value="1")) self.assertEqual(delta.last_applied_at, date0)
def test_delete_scopes_tab_delete_by_workflow(self): workflow = Workflow.create_and_init() workflow2 = Workflow.create_and_init() # Here's a soft-deleted module on workflow2. Nothing references it. It # "shouldn't" exist. tab = workflow2.tabs.create(position=1) # now delete a delta on workflow1 self.run_with_async_db( commands.do(SetWorkflowTitle, workflow_id=workflow.id, new_value="1")) self.run_with_async_db(commands.undo( workflow.id)) # fix workflow.last_delta_id workflow.deltas.all().delete() workflow.delete_orphan_soft_deleted_models() tab.refresh_from_db() # no DoesNotExist: leave workflow2 alone
def test_delete_last_tab_noop(self): workflow = Workflow.create_and_init(selected_tab_position=1) tab1 = workflow.tabs.first() self.run_with_async_db( commands.do(DeleteTab, workflow_id=workflow.id, tab=tab1)) self.assertEqual(workflow.deltas.count(), 0) tab1.refresh_from_db() self.assertEqual(tab1.is_deleted, False)
def test_redo_after_final_delta(self, send_update): send_update.side_effect = async_noop workflow = Workflow.create_and_init(name="hello") self.run_with_async_db( commands.do(SetWorkflowTitle, workflow_id=workflow.id, new_value="1")) send_update.reset_mock() self.run_with_async_db(commands.redo(workflow.id)) send_update.assert_not_called()
def test_keep_recent_done_deltas(self): workflow = Workflow.create_and_init() with freeze_time("2020-02-02"): do(SetWorkflowTitle, workflow.id, new_value="foo") do(SetWorkflowTitle, workflow.id, new_value="bar") delete_workflow_stale_deltas(workflow.id, datetime.datetime(2020, 1, 1)) self.assertEqual(workflow.deltas.count(), 2) be_paranoid_and_assert_commands_apply(workflow)
def test_no_op(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() tab.name = "foo" tab.save(update_fields=["name"]) cmd = self.run_with_async_db( commands.do(SetTabName, workflow_id=workflow.id, tab=tab, new_name="foo") ) self.assertIsNone(cmd)
def test_fetch_result_happy_path(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, fetch_errors=[ RenderError(I18nMessage("foo", {}, "module")), RenderError(I18nMessage("bar", {"x": "y"}, "cjwmodule")), ], ) with parquet_file({"A": [1]}) as path: so = create_stored_object(workflow.id, step.id, path) step.stored_data_version = so.stored_at step.save(update_fields=["stored_data_version"]) module_zipfile = create_module_zipfile( "x", spec_kwargs={"loads_data": True}, python_code=textwrap.dedent( """ import pyarrow as pa import pandas as pd from pandas.testing import assert_frame_equal from cjwkernel.types import RenderError, I18nMessage def render(table, params, *, fetch_result, **kwargs): assert fetch_result.errors == [ RenderError(I18nMessage("foo", {}, "module")), RenderError(I18nMessage("bar", {"x": "y"}, "cjwmodule")), ] fetch_dataframe = pa.parquet.read_table(str(fetch_result.path)) assert_frame_equal(fetch_dataframe, pd.DataFrame({"A": [1]})) return pd.DataFrame() """ ), ) with self.assertLogs(level=logging.INFO): self.run_with_async_db( execute_step( chroot_context=self.chroot_context, workflow=workflow, step=step, module_zipfile=module_zipfile, params={}, tab_name=tab.name, input_path=self.empty_table_path, input_table_columns=[], tab_results={}, output_path=self.output_path, ) )
def test_reorder_blocks_on_automatically_generated_report(self, send_update): future_none = asyncio.Future() future_none.set_result(None) send_update.return_value = future_none create_module_zipfile("chart", spec_kwargs={"html_output": True}) workflow = Workflow.create_and_init(has_custom_report=False) tab = workflow.tabs.first() tab.steps.create(order=0, slug="step-1", module_id_name="nochart") step2 = tab.steps.create(order=1, slug="step-2", module_id_name="chart") step3 = tab.steps.create(order=2, slug="step-3", module_id_name="chart") self.run_with_async_db( commands.do( ReorderBlocks, workflow_id=workflow.id, slugs=["block-auto-step-3", "block-auto-step-2"], ) ) self.assertEqual( list( workflow.blocks.values_list( "position", "slug", "block_type", "text_markdown", "step_id" ) ), [ (0, "block-auto-step-3", "Chart", "", step3.id), (1, "block-auto-step-2", "Chart", "", step2.id), ], ) delta1 = send_update.call_args[0][1] self.assertEqual(delta1.workflow.has_custom_report, True) self.assertEqual( delta1.workflow.block_slugs, ["block-auto-step-3", "block-auto-step-2"], ) self.assertEqual( delta1.blocks, { "block-auto-step-2": clientside.ChartBlock("step-2"), "block-auto-step-3": clientside.ChartBlock("step-3"), }, ) self.run_with_async_db(commands.undo(workflow.id)) self.assertEqual(list(workflow.blocks.values_list("slug", "position")), []) delta2 = send_update.call_args[0][1] self.assertEqual(delta2.workflow.has_custom_report, False) self.assertEqual(delta2.workflow.block_slugs, []) self.assertEqual( delta2.clear_block_slugs, frozenset(["block-auto-step-2", "block-auto-step-3"]), ) self.assertEqual(delta2.blocks, {})