def test_render_using_tab_output(self): def render(table, params): self.assertEqual(params["tabparam"].name, "Tab 1") self.assertEqual( params["tabparam"].columns, { "X": ptypes.RenderColumn("X", "number", "{:,d}"), "Y": ptypes.RenderColumn("Y", "text", None), }, ) assert_frame_equal(params["tabparam"].dataframe, pd.DataFrame({ "X": [1], "Y": ["y"] })) param_schema = ParamSchema.Dict({"tabparam": ParamSchema.Tab()}) with ModuleTestEnv(param_schema=param_schema, render=render) as env: with arrow_table_context( make_column("X", [1], format="{:,d}"), make_column("Y", ["y"]), dir=env.basedir, ) as (path, _): env.call_render( make_table(), params={"tabparam": "tab-1"}, tab_outputs={ "tab-1": TabOutput(tab_name="Tab 1", table_filename=path.name) }, )
def test_clean_file_safe_filename(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create(module_id_name="uploadfile", order=0, slug="step-1") key = f"wf-${workflow.id}/wfm-${step.id}/6e00511a-8ac4-4b72-9acc-9d069992b5cf" s3.put_bytes(s3.UserFilesBucket, key, b"1234") model = UploadedFileModel.objects.create( step=step, name="/etc/passwd.$/etc/passwd", size=4, uuid="6e00511a-8ac4-4b72-9acc-9d069992b5cf", key=key, ) with ExitStack() as inner_stack: result = self._call_prep_params( ParamSchema.Dict({"file": ParamSchema.File()}), {"file": "6e00511a-8ac4-4b72-9acc-9d069992b5cf"}, step_id=step.id, exit_stack=inner_stack, ) self.assertEqual( result.uploaded_files["6e00511a-8ac4-4b72-9acc-9d069992b5cf"], UploadedFile( "/etc/passwd.$/etc/passwd", "6e00511a-8ac4-4b72-9acc-9d069992b5cf_-etc-passwd.--etc-passwd", model.created_at, ), )
def test_render_uploaded_files(self): def render_arrow_v1(table, params, *, uploaded_files, **kwargs): self.assertEqual(params["file"], "406b5e37-f217-4e87-b6b2-eede3bec6492") uploaded_file = uploaded_files[params["file"]] self.assertEqual(uploaded_file.name, "x.data") self.assertEqual(uploaded_file.uploaded_at, datetime(2021, 4, 21, 12, 4, 5)) return ArrowRenderResult(make_table()) param_schema = ParamSchema.Dict({"file": ParamSchema.File()}) with ModuleTestEnv(param_schema=param_schema, render_arrow_v1=render_arrow_v1) as env: temp_path = env.basedir / "406b5e37-f217-4e87-b6b2-eede3bec6492_x.data" temp_path.write_bytes(b"hello, world!") env.call_render( make_table(), params={"file": "406b5e37-f217-4e87-b6b2-eede3bec6492"}, uploaded_files={ "406b5e37-f217-4e87-b6b2-eede3bec6492": UploadedFile( name="x.data", filename=temp_path.name, uploaded_at=datetime(2021, 4, 21, 12, 4, 5), ), }, )
def test_clean_condition_empty_column_is_none(self): self.assertIsNone( self._call_clean_value( ParamSchema.Condition(), { "operation": "text_is", "column": "", "value": "", "isCaseSensitive": False, "isRegex": False, }, input_table_columns=[NUMBER("A")], )) # And test it in the context of a broader and/or self.assertIsNone( self._call_clean_value( ParamSchema.Condition(), { "operation": "and", "conditions": [{ "operation": "or", "conditions": [{ "operation": "text_is", "column": "", "value": "", "isCaseSensitive": False, "isRegex": False, }], }], }, input_table_columns=[NUMBER("A")], ))
def test_render_empty_file_param(self): def render(arrow_table, params, output_path, *args, **kwargs): self.assertIsNone(params["file"]) param_schema = ParamSchema.Dict({"file": ParamSchema.File()}) with ModuleTestEnv(param_schema=param_schema, render=render) as env: env.call_render(make_table(), {"file": None})
def test_param_schema_explicit(): spec = load_spec( dict( id_name="x", name="x", category="Clean", parameters=[{"id_name": "whee", "type": "custom"}], param_schema={ "id_name": { "type": "dict", "properties": { "x": {"type": "integer"}, "y": {"type": "string", "default": "X"}, }, } }, ) ) assert spec.param_schema == ParamSchema.Dict( { "id_name": ParamSchema.Dict( {"x": ParamSchema.Integer(), "y": ParamSchema.String(default="X")} ) } )
def test_param_schema_implicit(): spec = load_spec( dict( id_name="googlesheets", name="x", category="Clean", parameters=[ {"id_name": "foo", "type": "string", "default": "X"}, { "id_name": "bar", "type": "secret", "secret_logic": {"provider": "oauth2", "service": "google"}, }, { "id_name": "baz", "type": "menu", "options": [ {"value": "a", "label": "A"}, "separator", {"value": "c", "label": "C"}, ], "default": "c", }, ], ) ) assert spec.param_schema == ParamSchema.Dict( { "foo": ParamSchema.String(default="X"), # secret is not in param_schema "baz": ParamSchema.Enum(choices=frozenset({"a", "c"}), default="c"), } )
def test_list_recurse(): assert parse({ "type": "list", "inner_dtype": { "type": "string" } }) == ParamSchema.List(ParamSchema.String())
def test_dict_prompting_error_concatenate_different_types(self): schema = ParamSchema.Dict({ "x": ParamSchema.Column(column_types=frozenset({"number"})), "y": ParamSchema.Column(column_types=frozenset({"number"})), }) with self.assertRaises(PromptingError) as cm: self._call_clean_value( schema, { "x": "A", "y": "B" }, input_table_columns=[TEXT("A"), TIMESTAMP("B")], ) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "timestamp", frozenset({"number"})), ], )
def test_map_recurse(): assert parse({ "type": "map", "value_dtype": { "type": "string" } }) == ParamSchema.Map(value_schema=ParamSchema.String())
def test_render_tab_outputs(self): def render_arrow_v1(table, params, *, tab_outputs, **kwargs): self.assertEqual(params["tab"], "tab-x") self.assertEqual(tab_outputs["tab-x"].tab_name, "Tab X") assert_arrow_table_equals( tab_outputs["tab-x"].table, make_table( make_column("X", [1], format="{:,d}"), make_column("Y", ["y"]), ), ) return ArrowRenderResult(make_table()) param_schema = ParamSchema.Dict({"tab": ParamSchema.Tab()}) with ModuleTestEnv(param_schema=param_schema, render_arrow_v1=render_arrow_v1) as env: with arrow_table_context( make_column("X", [1], format="{:,d}"), make_column("Y", ["y"]), dir=env.basedir, ) as (path, _): env.call_render( make_table(), params={"tab": "tab-x"}, tab_outputs={ "tab-x": TabOutput(tab_name="Tab X", table_filename=path.name) }, )
def test_dict_recurse(): assert parse({ "type": "dict", "properties": { "x": { "type": "string" } } }) == ParamSchema.Dict(properties={"x": ParamSchema.String()})
def test_clean_normal_dict(self): schema = ParamSchema.Dict({ "str": ParamSchema.String(), "int": ParamSchema.Integer() }) value = {"str": "foo", "int": 3} expected = dict(value) # no-op result = self._call_clean_value(schema, value) self.assertEqual(result, expected)
def _(self, schema: ParamSchema.Multitab, value: List[str]) -> List[str]: slugs = frozenset( # recurse -- the same way we clean a list. slug for slug in self.clean_value_list( ParamSchema.List(inner_schema=ParamSchema.Tab()), value) if slug is not None) # Order based on `self.tabs`. return [slug for slug in self.tabs.keys() if slug in slugs]
def test_clean_normal_dict(self): input_shape = TableMetadata(3, [Column("A", ColumnType.Number())]) schema = ParamSchema.Dict({ "str": ParamSchema.String(), "int": ParamSchema.Integer() }) value = {"str": "foo", "int": 3} expected = dict(value) # no-op result = clean_value(schema, value, input_shape) self.assertEqual(result, expected)
def test_render_with_no_kwargs(self): def render(table, params): return table * params["n"] param_schema = ParamSchema.Dict({"n": ParamSchema.Float()}) with ModuleTestEnv(param_schema=param_schema, render=render) as env: outcome = env.call_render(make_table(make_column("A", [1])), {"n": 2}) assert_arrow_table_equals(outcome.read_table(), make_table(make_column("A", [2])))
def test_clean_tab_omit_unused_tabs_from_tab_outputs(self): result = self._call_prep_params( ParamSchema.Dict({"x": ParamSchema.Tab()}), {"x": "tab-1"}, tab_results={ Tab("tab-1", "Tab 1"): StepResult(Path("tab-1.arrow"), [TEXT("A")]), Tab("tab-2", "Tab 2"): StepResult(Path("tab-2.arrow"), [TEXT("A")]), Tab("tab-3", "Tab 3"): StepResult(Path("tab-3.arrow"), [TEXT("A")]), }, ) self.assertEqual(result.tab_outputs, {"tab-1": TabOutput("Tab 1", "tab-1.arrow")})
def test_list_prompting_error_concatenate_different_type_to_text(self): schema = ParamSchema.List(inner_schema=ParamSchema.Column( column_types=frozenset({"text"}))) with self.assertRaises(PromptingError) as cm: self._call_clean_value( schema, ["A", "B"], input_table_columns=[NUMBER("A"), TIMESTAMP("B")]) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"})) ], )
def test_list_prompting_error_concatenate_same_type(self): schema = ParamSchema.List(inner_schema=ParamSchema.Column( column_types=frozenset({"number"}))) with self.assertRaises(PromptingError) as cm: self._call_clean_value(schema, ["A", "B"], input_table_columns=[TEXT("A"), TEXT("B")]) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A", "B"], "text", frozenset({"number"})) ], )
def test_clean_condition_and_or_simplify(self): self.assertEqual( self._call_clean_value( ParamSchema.Condition(), { "operation": "and", "conditions": [ { "operation": "or", "conditions": [ { "operation": "cell_is_blank", "column": "A", "value": "", "isCaseSensitive": False, "isRegex": False, }, ], }, ], }, input_table_columns=[NUMBER("A")], ), { "operation": "cell_is_blank", "column": "A", }, )
def test_clean_file_no_s3_file(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create(module_id_name="uploadfile", order=0, slug="step-1") step2 = tab.steps.create(module_id_name="uploadfile", order=1, slug="step-2") id = str(uuid.uuid4()) key = f"wf-${workflow.id}/wfm-${step.id}/${id}" # Oops -- let's _not_ put the file! # s3.put_bytes(s3.UserFilesBucket, key, b'1234') UploadedFileModel.objects.create(step=step2, name="x.csv.gz", size=4, uuid=id, key=key) result = self._call_clean_value(ParamSchema.File(), id, step_id=step.id) self.assertIsNone(result) # Assert that if a temporary file was created to house the download, it # no longer exists. self.assertListEqual(list(self.basedir.iterdir()), [])
def test_clean_float_with_int_value(self): # ParamSchema.Float can have `int` values (because values come from # json.parse(), which only gives Numbers so can give "3" instead of # "3.0". We want to pass that as `float` in the `params` dict. result = clean_value(ParamSchema.Float(), 3, None) self.assertEqual(result, 3.0) self.assertIsInstance(result, float)
def test_validate_ok(self): S.Multichartseries().validate( [ dict(column="A", color="#aaaaaa"), dict(column="B", color="#bbbbbb"), ] )
def test_clean_multichartseries_non_number_is_prompting_error(self): with self.assertRaises(PromptingError) as cm: self._call_clean_value( ParamSchema.Multichartseries(), [ { "column": "A", "color": "#aaaaaa" }, { "column": "B", "color": "#cccccc" }, ], input_table_columns=[TEXT("A"), TIMESTAMP("B")], ) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "timestamp", frozenset({"number"})), ], )
def test_clean_tabs_tab_cycle(self): with self.assertRaises(TabCycleError): self._call_clean_value( ParamSchema.Multitab(), ["tab-1"], tab_results={Tab("tab-1", "Tab 1"): None}, )
def test_clean_column_happy_path(self): input_shape = TableMetadata(3, [Column("A", ColumnType.Number())]) self.assertEqual( clean_value(ParamSchema.Column(column_types=frozenset({"number"})), "A", input_shape), "A", )
def test_clean_file_happy_path(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create(module_id_name="uploadfile", order=0, slug="step-1") key = f"wf-${workflow.id}/wfm-${step.id}/6e00511a-8ac4-4b72-9acc-9d069992b5cf" s3.put_bytes(s3.UserFilesBucket, key, b"1234") model = UploadedFileModel.objects.create( step=step, name="x.csv.gz", size=4, uuid="6e00511a-8ac4-4b72-9acc-9d069992b5cf", key=key, ) with ExitStack() as inner_stack: result = self._call_prep_params( ParamSchema.Dict({"file": ParamSchema.File()}), {"file": "6e00511a-8ac4-4b72-9acc-9d069992b5cf"}, step_id=step.id, exit_stack=inner_stack, ) self.assertEqual( result, PrepParamsResult( {"file": "6e00511a-8ac4-4b72-9acc-9d069992b5cf"}, tab_outputs={}, uploaded_files={ "6e00511a-8ac4-4b72-9acc-9d069992b5cf": UploadedFile( "x.csv.gz", "6e00511a-8ac4-4b72-9acc-9d069992b5cf_x.csv.gz", model.created_at, ) }, ), ) self.assertEqual( (self.basedir / "6e00511a-8ac4-4b72-9acc-9d069992b5cf_x.csv.gz").read_bytes(), b"1234", ) # Assert that once `exit_stack` goes out of scope, file is deleted self.assertFalse( (self.basedir / "6e00511a-8ac4-4b72-9acc-9d069992b5cf_x.csv.gz").exists())
def test_render_file_param(self): def render(arrow_table, params, output_path, *args, **kwargs): self.assertEqual(params["file"].read_bytes(), b"hi") param_schema = ParamSchema.Dict({"file": ParamSchema.File()}) with ModuleTestEnv(param_schema=param_schema, render=render) as env: filename = "839526fa-1adb-4eec-9d29-f5b4d2fbba30_x.tar.gz" (env.basedir / filename).write_bytes(b"hi") env.call_render( make_table(), {"file": "839526fa-1adb-4eec-9d29-f5b4d2fbba30"}, uploaded_files={ "839526fa-1adb-4eec-9d29-f5b4d2fbba30": UploadedFile( "x.tar.gz", filename, datetime.now() ) }, )
def test_clean_multicolumn_sort_in_table_order(self): input_shape = TableMetadata(3, [ Column("B", ColumnType.Number()), Column("A", ColumnType.Number()) ]) result = clean_value(ParamSchema.Multicolumn(), ["A", "B"], input_shape) self.assertEqual(result, ["B", "A"])
def test_clean_multicolumn_missing_is_removed(self): input_shape = TableMetadata(3, [ Column("A", ColumnType.Number()), Column("B", ColumnType.Number()) ]) result = clean_value(ParamSchema.Multicolumn(), ["A", "X", "B"], input_shape) self.assertEqual(result, ["A", "B"])