Пример #1
0
 def test_clean_column_happy_path(self):
     input_shape = TableMetadata(3, [Column("A", ColumnType.Number())])
     self.assertEqual(
         clean_value(ParamDType.Column(column_types=frozenset({"number"})),
                     "A", input_shape),
         "A",
     )
Пример #2
0
    def test_dict_prompting_error_concatenate_same_type(self):
        context = self._render_context(
            input_table=arrow_table({"A": ["1"], "B": ["2"]})
        )
        schema = ParamDType.Dict(
            {
                "x": ParamDType.Column(column_types=frozenset({"number"})),
                "y": ParamDType.Column(column_types=frozenset({"number"})),
            }
        )
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, {"x": "A", "y": "B"}, context)

        self.assertEqual(
            cm.exception.errors,
            [PromptingError.WrongColumnType(["A", "B"], "text", frozenset({"number"}))],
        )
Пример #3
0
 def test_clean_column_prompting_error_convert_to_number(self):
     context = self._render_context(input_table=arrow_table({"A": ["1"]}))
     with self.assertRaises(PromptingError) as cm:
         clean_value(
             ParamDType.Column(column_types=frozenset({"number"})), "A", context
         )
     self.assertEqual(
         cm.exception.errors,
         [PromptingError.WrongColumnType(["A"], "text", frozenset({"number"}))],
     )
Пример #4
0
    def test_clean_column_prompting_error_convert_to_number(self):
        input_shape = TableMetadata(3, [Column("A", ColumnType.Text())])
        with self.assertRaises(PromptingError) as cm:
            clean_value(ParamDType.Column(column_types=frozenset({"number"})),
                        "A", input_shape)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text",
                                               frozenset({"number"}))
            ],
        )
Пример #5
0
    def test_dict_prompting_error(self):
        input_shape = TableMetadata(
            3,
            [Column("A", ColumnType.Text()),
             Column("B", ColumnType.Text())])
        schema = ParamDType.Dict({
            "col1":
            ParamDType.Column(column_types=frozenset({"number"})),
            "col2":
            ParamDType.Column(column_types=frozenset({"timestamp"})),
        })
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, {"col1": "A", "col2": "B"}, input_shape)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text",
                                               frozenset({"number"})),
                PromptingError.WrongColumnType(["B"], "text",
                                               frozenset({"timestamp"})),
            ],
        )
Пример #6
0
    def test_dict_prompting_error_concatenate_different_types(self):
        context = self._render_context(input_table=arrow_table({
            "A": ["1"],
            "B":
            pa.array([datetime.now()], pa.timestamp("ns"))
        }))
        schema = ParamDType.Dict({
            "x":
            ParamDType.Column(column_types=frozenset({"number"})),
            "y":
            ParamDType.Column(column_types=frozenset({"number"})),
        })
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, {"x": "A", "y": "B"}, context)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text",
                                               frozenset({"number"})),
                PromptingError.WrongColumnType(["B"], "datetime",
                                               frozenset({"number"})),
            ],
        )
Пример #7
0
    def test_clean_column_prompting_error_convert_to_text(self):
        # TODO make this _automatic_ instead of quick-fix?
        # Consider Regex. We probably want to pass the module a text Series
        # _separately_ from the input DataFrame. That way Regex can output
        # a new Text column but preserve its input column's data type.
        #
        # ... but for now: prompt for a Quick Fix.
        input_shape = TableMetadata(3, [Column("A", ColumnType.Number())])
        with self.assertRaises(PromptingError) as cm:
            clean_value(ParamDType.Column(column_types=frozenset({"text"})),
                        "A", input_shape)

        self.assertEqual(
            cm.exception.errors,
            [PromptingError.WrongColumnType(["A"], None, frozenset({"text"}))],
        )
Пример #8
0
    def test_list_prompting_error_concatenate_different_type_to_text(self):
        context = self._render_context(
            input_table=arrow_table(
                {"A": [1], "B": pa.array([datetime.now()], pa.timestamp("ns"))}
            )
        )
        schema = ParamDType.List(
            inner_dtype=ParamDType.Column(column_types=frozenset({"text"}))
        )
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, ["A", "B"], context)

        self.assertEqual(
            cm.exception.errors,
            [PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"}))],
        )
Пример #9
0
    def test_list_dtype(self):
        # Check that ParamSpec's with List type produce correct nested DTypes
        param_spec = ParamSpec.from_dict(
            dict(
                id_name="p",
                type="list",
                child_parameters=[
                    {
                        "id_name": "intparam",
                        "type": "integer",
                        "name": "my number"
                    },
                    {
                        "id_name": "colparam",
                        "type": "column",
                        "name": "my column"
                    },
                ],
            ))
        self.assertEqual(
            param_spec,
            ParamSpec.List(
                id_name="p",
                child_parameters=[
                    ParamSpec.Integer(id_name="intparam", name="my number"),
                    ParamSpec.Column(id_name="colparam", name="my column"),
                ],
            ),
        )
        dtype = param_spec.dtype
        expected_dtype = DT.List(
            DT.Dict({
                "intparam": DT.Integer(),
                "colparam": DT.Column()
            }))

        # effectively do a deep compare with repr
        self.assertEqual(repr(dtype), repr(expected_dtype))
Пример #10
0
 def test_clean_column_missing(self):
     input_shape = TableMetadata(3, [Column("A", ColumnType.Number())])
     self.assertEqual(clean_value(ParamDType.Column(), "B", input_shape),
                      "")
Пример #11
0
 def test_clean_column_tab_parameter_is_error(self):
     input_shape = TableMetadata(3, [Column("A", ColumnType.Number())])
     with self.assertRaisesRegex(
             RuntimeError, "Unsupported: fetch column with tab_parameter"):
         clean_value(ParamDType.Column(tab_parameter="tab-2"), "A",
                     input_shape)
Пример #12
0
 def test_clean_column_no_input_is_empty(self):
     self.assertEqual(
         clean_value(ParamDType.Column(), "A", TableMetadata()), "")
Пример #13
0
 def test_clean_column_missing_becomes_empty_string(self):
     context = self._render_context(input_table=arrow_table({"A": [1]}))
     result = clean_value(ParamDType.Column(), "B", context)
     self.assertEqual(result, "")
Пример #14
0
 def test_clean_column_valid(self):
     context = self._render_context(input_table=arrow_table({"A": [1]}))
     result = clean_value(ParamDType.Column(), "A", context)
     self.assertEqual(result, "A")
Пример #15
0
 def dtype(self) -> Optional[ParamDType]:
     return ParamDType.Column(
         column_types=(frozenset(self.column_types)
                       if self.column_types else None),
         tab_parameter=self.tab_parameter,
     )