示例#1
0
    def test_predict(self):
        self.required_plugins("dffml-model-scikit")
        # Import SciKit modules
        dffml_model_scikit = importlib.import_module("dffml_model_scikit")
        # Instantiate the model
        model = dffml_model_scikit.LinearRegressionModel(
            directory=self.mktempdir(),
            predict=Feature("Salary", int, 1),
            features=Features(
                Feature("Years", int, 1),
                Feature("Expertise", int, 1),
                Feature("Trust", float, 1),
            ),
        )

        training_data = CSVSource(filename=self.train_filename)
        test_data = CSVSource(filename=self.test_filename)
        predict_data = CSVSource(filename=self.predict_filename)

        # Train the model
        train(model, training_data)
        # Assess accuracy
        accuracy(model, test_data)
        # Make prediction
        predictions = [
            prediction for prediction in predict(model, predict_data)
        ]
        self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70)
        self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80)
示例#2
0
 def test_config_defaults(self):
     config = FakeTesting.config(
         parse_unknown(
             "--test-fake-name",
             "feedface",
             "--test-num",
             "-4.2",
             "--test-files",
             "a",
             "b",
             "c",
             "--test-source-filename",
             "file.json",
             "--test-features",
             "Year:int:1",
             "Commits:int:10",
             "--test-fake-nums",
             "100",
         ))
     self.assertEqual(config.num, -4.2)
     self.assertEqual(config.files, ["a", "b", "c"])
     self.assertEqual(config.name, "feedface")
     self.assertEqual(config.label, "unlabeled")
     self.assertFalse(config.readonly)
     self.assertTrue(isinstance(config.source, JSONSource))
     self.assertEqual(config.source.config.filename,
                      pathlib.Path("file.json"))
     self.assertEqual(
         config.features,
         Features(Feature("Year", int, 1), Feature("Commits", int, 10)),
     )
     self.assertEqual(config.nums, (100, ))
    def setUpClass(cls):
        # Create a temporary directory to store the trained model
        cls.model_dir = tempfile.TemporaryDirectory()
        # Create an instance of the model
        cls.model = XGBRegressorModel(
            XGBRegressorModelConfig(
                features=Features(Feature("Feature1", float, 1),
                                  Feature("Feature2")),
                predict=Feature("Target", float, 1),
                directory=cls.model_dir.name,
            ))
        # Generating data f(x1,x2) = 2*x1 + 3*x2
        _n_data = 2000
        _temp_data = np.random.rand(2, _n_data)
        cls.records = [
            Record(
                "x" + str(random.random()),
                data={
                    "features": {
                        "Feature1": float(_temp_data[0][i]),
                        "Feature2": float(_temp_data[1][i]),
                        "Target": 2 * _temp_data[0][i] + 3 * _temp_data[1][i],
                    }
                },
            ) for i in range(0, _n_data)
        ]

        cls.trainingsource = Sources(
            MemorySource(MemorySourceConfig(records=cls.records[:1800])))
        cls.testsource = Sources(
            MemorySource(MemorySourceConfig(records=cls.records[1800:])))
示例#4
0
 def test_config_set(self):
     config = FakeTesting.config(
         parse_unknown(
             "--test-fake-name",
             "feedface",
             "--test-num",
             "-4.2",
             "--test-fake-label",
             "default-label",
             "--test-fake-readonly",
             "--test-files",
             "a",
             "b",
             "c",
             "--test-fake-source",
             "csv",
             "--test-source-filename",
             "file.csv",
             "--test-features",
             "Year:int:1",
             "Commits:int:10",
         ))
     self.assertEqual(config.num, -4.2)
     self.assertEqual(config.files, ["a", "b", "c"])
     self.assertEqual(config.name, "feedface")
     self.assertEqual(config.label, "default-label")
     self.assertTrue(config.readonly)
     self.assertTrue(isinstance(config.source, CSVSource))
     self.assertEqual(config.source.config.filename,
                      pathlib.Path("file.csv"))
     self.assertEqual(
         config.features,
         Features(Feature("Year", int, 1), Feature("Commits", int, 10)),
     )
示例#5
0
    def setUpClass(cls):
        # Create a temporary directory to store the trained model
        cls.model_dir = tempfile.TemporaryDirectory()
        # Create an instance of the model
        cls.model = AnomalyModel(
            features=Features(
                Feature("A", int, 1),
                Feature("B", int, 2),
            ),
            predict=Feature("Y", int, 1),
            directory=cls.model_dir.name,
        )

        # Generating data

        _n_data = 1800
        _temp_data = np.random.normal(2, 1, size=(2, _n_data))
        cls.records = [
            Record(
                "x" + str(random.random()),
                data={
                    "features": {
                        "A": float(_temp_data[0][i]),
                        "B": float(_temp_data[1][i]),
                        "Y":
                        (_temp_data[0][i] > 1 - _temp_data[1][i]).astype(int),
                    }
                },
            ) for i in range(0, _n_data)
        ]

        cls.trainingsource = Sources(
            MemorySource(MemorySourceConfig(records=cls.records[:1400])))
        cls.testsource = Sources(
            MemorySource(MemorySourceConfig(records=cls.records[1400:])))
示例#6
0
 def _feature_feature_column(self, feature: Feature):
     """
     Creates a feature column for a feature
     """
     dtype = feature.dtype()
     if not inspect.isclass(dtype):
         self.logger.warning("Unknown dtype %r. Cound not create column" %
                             (dtype))
         return None
     if (dtype is int or issubclass(dtype, int) or dtype is float
             or issubclass(dtype, float)):
         return self.tf.feature_column.numeric_column(
             feature.NAME, shape=feature.length())
     self.logger.warning("Unknown dtype %r. Cound not create column" %
                         (dtype))
     return None
示例#7
0
 async def test_01_accuracy(self):
     scorer = MeanSquaredErrorAccuracy()
     # Use the test data to assess the model's accuracy
     res = await score(self.model, scorer, Feature("Target", float, 1),
                       self.testsource)
     # Ensure the accuracy is above 80%
     self.assertTrue(res)
    async def test_02_predict(self):
        # reduce overfitting
        res_train = await score(
            self.model,
            self.scorer,
            Feature("Target", float, 1),
            self.trainingsource,
        )

        res_test = await score(
            self.model,
            self.scorer,
            Feature("Target", float, 1),
            self.testsource,
        )
        # Test fails if the difference between training and testing is more that 5%
        self.assertLess(res_train - res_test, 0.05)
 async def test_01_accuracy(self):
     # Use the test data to assess the model's accuracy
     res = await score(
         self.model,
         self.scorer,
         Feature("Target", float, 1),
         self.testsource,
     )
     # Ensure the accuracy is above 80%
     self.assertTrue(0.8 <= res)
示例#10
0
    async def test_predict(self):
        self.required_plugins("dffml-model-scikit")
        # Import SciKit modules
        dffml_model_scikit = importlib.import_module("dffml_model_scikit")
        # Instantiate the model
        model = dffml_model_scikit.LinearRegressionModel(
            location=self.mktempdir(),
            predict=Feature("Salary", int, 1),
            features=Features(
                Feature("Years", int, 1),
                Feature("Expertise", int, 1),
                Feature("Trust", float, 1),
            ),
        )

        training_data = CSVSource(filename=self.train_filename)
        test_data = CSVSource(filename=self.test_filename)
        predict_data = CSVSource(filename=self.predict_filename)

        # Train the model
        await train(model, training_data)
        # Assess accuracy
        scorer = MeanSquaredErrorAccuracy()
        await score(model, scorer, Feature("Salary", int, 1), test_data)
        # Make prediction
        predictions = [
            prediction async for prediction in predict(model, predict_data)
        ]
        self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70)
        self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80)

        # Test input data as list
        await train(model, *self.train_data)
        await score(model, scorer, Feature("Salary", int, 1), *self.test_data)
        predictions = [
            prediction
            async for prediction in predict(model, *self.predict_data)
        ]
        self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70)
        self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80)
示例#11
0
        **{
            "directory": (
                pathlib.Path,
                field("Directory where state should be saved", ),
            ),
            "features": (Features, field("Features to train on")),
        },
        **config_fields,
    }

    if estimator_type in unsupervised_estimators:
        dffml_config_properties["predict"] = (
            Feature,
            field(
                "Name used as meaning of prediction",
                default=Feature(name="cluster", dtype=str, length=1),
            ),
        )

    dffml_config = make_config_numpy(name + "ModelConfig",
                                     cls,
                                     properties=dffml_config_properties)

    dffml_cls_ctx = type(
        name + "ModelContext",
        (parentContext, ),
        {},
    )

    dffml_cls = type(
        name + "Model",
示例#12
0
 async def test_01_accuracy(self):
     res = await score(
         self.model, self.scorer, Feature("Tag", str, 1), self.train_sources
     )
     self.assertGreaterEqual(res, 0)
示例#13
0
class FakeTestingConfig2:
    name: str = field("Name of FakeTesting2")
    num: float
    features: Features = Features(Feature("default", int, 1),
                                  Feature("features", int, 10))
    label: str = "unlabeled"