def test_param_schema_explicit(self): spec = ModuleSpec( id_name="x", name="x", category="Clean", parameters=[{ "id_name": "whee", "type": "custom" }], param_schema={ "id_name": { "type": "dict", "properties": { "x": { "type": "integer" }, "y": { "type": "string", "default": "X" }, }, } }, ) self.assertEqual( spec.get_param_schema(), ParamDType.Dict({ "id_name": ParamDType.Dict({ "x": ParamDType.Integer(), "y": ParamDType.String(default="X") }) }), )
def dtype(self) -> Optional[ParamDType]: return ParamDType.Option( ParamDType.Dict({ "id": ParamDType.String(), "name": ParamDType.String(), "url": ParamDType.String(), "mimeType": ParamDType.String(), }))
def test_clean_normal_dict(self): context = self._render_context() schema = ParamDType.Dict( {"str": ParamDType.String(), "int": ParamDType.Integer()} ) value = {"str": "foo", "int": 3} expected = dict(value) # no-op result = clean_value(schema, value, context) self.assertEqual(result, expected)
def test_clean_normal_dict(self): input_shape = TableMetadata(3, [Column("A", ColumnType.Number())]) schema = ParamDType.Dict({ "str": ParamDType.String(), "int": ParamDType.Integer() }) value = {"str": "foo", "int": 3} expected = dict(value) # no-op result = clean_value(schema, value, input_shape) self.assertEqual(result, expected)
def test_param_schema_implicit(self): spec = ModuleSpec( id_name="googlesheets", name="x", category="Clean", parameters=[ { "id_name": "foo", "type": "string", "default": "X" }, { "id_name": "bar", "type": "secret", "secret_logic": { "provider": "oauth2", "service": "google" }, }, { "id_name": "baz", "type": "menu", "options": [ { "value": "a", "label": "A" }, "separator", { "value": "c", "label": "C" }, ], "default": "c", }, ], ) self.assertEqual( spec.get_param_schema(), ParamDType.Dict({ "foo": ParamDType.String(default="X"), # secret is not in param_schema "baz": ParamDType.Enum(choices=frozenset({"a", "c"}), default="c"), }), )
def get_param_schema(self) -> ParamDType.Dict: if self.param_schema is not None: # Module author wrote a schema in the YAML, to define storage of 'custom' parameters json_schema = self.param_schema return ParamDType.parse({ "type": "dict", "properties": json_schema }) else: # Usual case: infer schema from module parameter types # Use of dict here means schema is not sensitive to parameter ordering, which is good return ParamDType.Dict( dict((f.id_name, f.dtype) for f in self.param_fields if f.dtype is not None))
def test_dict_prompting_error_concatenate_same_type(self): context = self._render_context( input_table=arrow_table({"A": ["1"], "B": ["2"]}) ) schema = ParamDType.Dict( { "x": ParamDType.Column(column_types=frozenset({"number"})), "y": ParamDType.Column(column_types=frozenset({"number"})), } ) with self.assertRaises(PromptingError) as cm: clean_value(schema, {"x": "A", "y": "B"}, context) self.assertEqual( cm.exception.errors, [PromptingError.WrongColumnType(["A", "B"], "text", frozenset({"number"}))], )
def test_map_parse(self): dtype = ParamDType.parse( { "type": "map", "value_dtype": { "type": "dict", # test nesting "properties": {"foo": {"type": "string"}}, }, } ) self.assertEqual( repr(dtype), repr( ParamDType.Map( value_dtype=ParamDType.Dict(properties={"foo": ParamDType.String()}) ) ), )
def test_clean_multicolumn_from_other_tab_that_does_not_exist(self): # The other tab would not exist if the user selected and then deleted # it. schema = ParamDType.Dict({ "tab": ParamDType.Tab(), "columns": ParamDType.Multicolumn(tab_parameter="tab"), }) params = {"tab": "tab-missing", "columns": ["A-from-tab-1"]} context = self._render_context( input_table=arrow_table({"A-from-tab-1": [1]}), tab_results={}, params=params, ) result = clean_value(schema, params, context) # result['tab'] is not what we're testing here self.assertEqual(result["columns"], [])
def test_clean_multicolumn_from_other_tab(self): tab2 = Tab("tab-2", "Tab 2") tab2_output_table = arrow_table({"A-from-tab-2": [1, 2]}) schema = ParamDType.Dict({ "tab": ParamDType.Tab(), "columns": ParamDType.Multicolumn(tab_parameter="tab"), }) params = {"tab": "tab-2", "columns": ["A-from-tab-1", "A-from-tab-2"]} context = self._render_context( input_table=arrow_table({"A-from-tab-1": [1]}), tab_results={tab2: RenderResult(tab2_output_table)}, params=params, ) result = clean_value(schema, params, context) # result['tab'] is not what we're testing here self.assertEqual(result["columns"], ["A-from-tab-2"])
def test_list_dtype(self): # Check that ParamSpec's with List type produce correct nested DTypes param_spec = ParamSpec.from_dict( dict( id_name="p", type="list", child_parameters=[ { "id_name": "intparam", "type": "integer", "name": "my number" }, { "id_name": "colparam", "type": "column", "name": "my column" }, ], )) self.assertEqual( param_spec, ParamSpec.List( id_name="p", child_parameters=[ ParamSpec.Integer(id_name="intparam", name="my number"), ParamSpec.Column(id_name="colparam", name="my column"), ], ), ) dtype = param_spec.dtype expected_dtype = DT.List( DT.Dict({ "intparam": DT.Integer(), "colparam": DT.Column() })) # effectively do a deep compare with repr self.assertEqual(repr(dtype), repr(expected_dtype))
def test_dict_prompting_error(self): input_shape = TableMetadata( 3, [Column("A", ColumnType.Text()), Column("B", ColumnType.Text())]) schema = ParamDType.Dict({ "col1": ParamDType.Column(column_types=frozenset({"number"})), "col2": ParamDType.Column(column_types=frozenset({"timestamp"})), }) with self.assertRaises(PromptingError) as cm: clean_value(schema, {"col1": "A", "col2": "B"}, input_shape) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "text", frozenset({"timestamp"})), ], )
def test_dict_prompting_error_concatenate_different_types(self): context = self._render_context(input_table=arrow_table({ "A": ["1"], "B": pa.array([datetime.now()], pa.timestamp("ns")) })) schema = ParamDType.Dict({ "x": ParamDType.Column(column_types=frozenset({"number"})), "y": ParamDType.Column(column_types=frozenset({"number"})), }) with self.assertRaises(PromptingError) as cm: clean_value(schema, {"x": "A", "y": "B"}, context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "datetime", frozenset({"number"})), ], )
def dtype(self) -> Optional[ParamDType]: child_dtypes = { cp.id_name: cp.dtype for cp in self.child_parameters if cp.dtype } return ParamDType.List(ParamDType.Dict(child_dtypes))