def test_vector_parse_twice(self) -> None:
        dataset = test_utils.load_dataset(self._image_dataset_path)
        df = test_utils.get_dataframe(dataset, "learningData")

        hyperparams_class = ColumnParserPrimitive.metadata.get_hyperparams()
        cpp = ColumnParserPrimitive(
            hyperparams=hyperparams_class.defaults().replace(
                {
                    "parsing_semantics": [
                        "https://metadata.datadrivendiscovery.org/types/FloatVector",
                    ]
                }
            )
        )
        target_coords = [
            20.999598,
            63.488694,
            20.999598,
            63.499462,
            21.023702,
            63.499462,
            21.023702,
            63.488694,
        ]
        result_df = cpp.produce(inputs=df).value
        result_coords = result_df["coordinates"][0]
        self.assertEquals(len(result_coords), len(target_coords))
        for a, b in zip(target_coords, result_coords):
            self.assertAlmostEqual(a, b, 5)

        result_2_df = cpp.produce(inputs=result_df).value
        result_2_coords = result_2_df["coordinates"][0]
        self.assertEquals(len(result_2_coords), len(target_coords))
        for a, b in zip(target_coords, result_2_coords):
            self.assertAlmostEqual(a, b, 5)
 def test_basic(self) -> None:
     dataset = test_utils.load_dataset(self._tabular_dataset_path)
     df = test_utils.get_dataframe(dataset, "learningData")
     df.metadata = df.metadata.add_semantic_type(
         (metadata_base.ALL_ELEMENTS, 1), "http://schema.org/Integer"
     )
     df.metadata = df.metadata.add_semantic_type(
         (metadata_base.ALL_ELEMENTS, 2), "http://schema.org/Float"
     )
     hyperparams_class = ColumnParserPrimitive.metadata.get_hyperparams()
     cpp = ColumnParserPrimitive(hyperparams=hyperparams_class.defaults())
     result_df = cpp.produce(inputs=df).value
     self.assertEqual(result_df["d3mIndex"].dtype, np.dtype("int64"))
     self.assertEqual(result_df["alpha"].dtype, np.dtype("int64"))
     self.assertEqual(result_df["bravo"].dtype, np.dtype("float64"))
     self.assertEqual(result_df["charlie"].dtype, np.dtype("int64"))
     self.assertEqual(result_df["delta"].dtype, np.dtype("object"))
     self.assertEqual(result_df["echo"].dtype, np.dtype("float64"))
    def _load_data(self) -> None:
        dataset = test_utils.load_dataset(self._dataset_path)
        dataframe = test_utils.get_dataframe(dataset, "learningData")

        dataframe.metadata = dataframe.metadata.add_semantic_type(
            (metadata_base.ALL_ELEMENTS, 2),
            "https://metadata.datadrivendiscovery.org/types/FloatVector",
        )

        hyperparam_class = ColumnParserPrimitive.metadata.query(
        )["primitive_code"]["class_type_arguments"]["Hyperparams"]
        cpp = ColumnParserPrimitive(hyperparams=hyperparam_class.defaults(
        ).replace({
            "parsing_semantics": (
                "http://schema.org/Boolean",
                "http://schema.org/Integer",
                "http://schema.org/Float",
                "https://metadata.datadrivendiscovery.org/types/FloatVector",
            )
        }))
        return cpp.produce(inputs=dataframe).value
 def test_datetime(self) -> None:
     dataset = test_utils.load_dataset(self._dataset_path)
     df = test_utils.get_dataframe(dataset, "0")
     df.metadata = df.metadata.add_semantic_type(
         (metadata_base.ALL_ELEMENTS, 4), "http://schema.org/DateTime"
     )
     hyperparams_class = ColumnParserPrimitive.metadata.get_hyperparams()
     cpp = ColumnParserPrimitive(
         hyperparams=hyperparams_class.defaults().replace(
             {
                 "parsing_semantics": [
                     "http://schema.org/DateTime",
                 ]
             }
         )
     )
     result_df = cpp.produce(inputs=df).value
     self.assertListEqual(
         list(result_df["sierra"]),
         [
             common_utils.parse_datetime_to_float(date, fuzzy=True)
             for date in df["sierra"]
         ],
     )
    def test_hyperparams_structural_type(self) -> None:
        dataset = test_utils.load_dataset(self._tabular_dataset_path)
        df = test_utils.get_dataframe(dataset, "learningData")
        df.metadata = df.metadata.add_semantic_type(
            (metadata_base.ALL_ELEMENTS, 1), "http://schema.org/Integer"
        )
        df.metadata = df.metadata.add_semantic_type(
            (metadata_base.ALL_ELEMENTS, 2), "http://schema.org/Float"
        )
        df.metadata = df.metadata.add_semantic_type(
            (metadata_base.ALL_ELEMENTS, 3), "http://schema.org/Integer"
        )
        df.metadata = df.metadata.add_semantic_type(
            (metadata_base.ALL_ELEMENTS, 4), "http://schema.org/Boolean"
        )
        df.metadata = df.metadata.add_semantic_type(
            (metadata_base.ALL_ELEMENTS, 5),
            "https://metadata.datadrivendiscovery.org/types/FloatVector",
        )
        dataset = test_utils.load_dataset(self._image_dataset_path)
        images = test_utils.get_dataframe(dataset, "learningData")
        df["echo"] = images["coordinates"][0:9]

        hyperparams_class = ColumnParserPrimitive.metadata.get_hyperparams()
        cpp = ColumnParserPrimitive(
            hyperparams=hyperparams_class.defaults().replace(
                {
                    "parsing_semantics": [
                        "http://schema.org/Float",
                        "http://schema.org/Integer",
                        "https://metadata.datadrivendiscovery.org/types/FloatVector",
                    ]
                }
            )
        )
        result_df = cpp.produce(inputs=df).value
        self.assertEqual(result_df["d3mIndex"].dtype, np.dtype("int64"))
        self.assertEqual(result_df["alpha"].dtype, np.dtype("int64"))
        self.assertEqual(result_df["bravo"].dtype, np.dtype("float64"))
        self.assertEqual(result_df["charlie"].dtype, np.dtype("int64"))
        self.assertEqual(result_df["delta"].dtype, np.dtype("object"))
        self.assertEqual(result_df["echo"].dtype, np.dtype("object"))
        for i in range(9):
            self.assertTrue(
                (
                    result_df["echo"][i]
                    == np.fromstring(images["coordinates"][i], dtype=float, sep=",")
                ).all()
            )
        self.assertEqual(
            result_df.metadata.query((metadata_base.ALL_ELEMENTS, 1))[
                "structural_type"
            ],
            int,
        )
        self.assertEqual(
            result_df.metadata.query((metadata_base.ALL_ELEMENTS, 2))[
                "structural_type"
            ],
            float,
        )
        self.assertEqual(
            result_df.metadata.query((metadata_base.ALL_ELEMENTS, 3))[
                "structural_type"
            ],
            int,
        )
        self.assertEqual(
            result_df.metadata.query((metadata_base.ALL_ELEMENTS, 4))[
                "structural_type"
            ],
            str,
        )
        self.assertEqual(
            result_df.metadata.query((metadata_base.ALL_ELEMENTS, 5))[
                "structural_type"
            ],
            np.ndarray,
        )