def setUpClass(cls): # Create a temporary directory to store the trained model cls.model_dir = tempfile.TemporaryDirectory() # Create an instance of the model cls.model = XGBRegressorModel( XGBRegressorModelConfig( features=Features(Feature("Feature1", float, 1), Feature("Feature2")), predict=Feature("Target", float, 1), directory=cls.model_dir.name, )) # Generating data f(x1,x2) = 2*x1 + 3*x2 _n_data = 2000 _temp_data = np.random.rand(2, _n_data) cls.records = [ Record( "x" + str(random.random()), data={ "features": { "Feature1": float(_temp_data[0][i]), "Feature2": float(_temp_data[1][i]), "Target": 2 * _temp_data[0][i] + 3 * _temp_data[1][i], } }, ) for i in range(0, _n_data) ] cls.trainingsource = Sources( MemorySource(MemorySourceConfig(records=cls.records[:1800]))) cls.testsource = Sources( MemorySource(MemorySourceConfig(records=cls.records[1800:])))
def test_predict(self): self.required_plugins("dffml-model-scikit") # Import SciKit modules dffml_model_scikit = importlib.import_module("dffml_model_scikit") # Instantiate the model model = dffml_model_scikit.LinearRegressionModel( directory=self.mktempdir(), predict=Feature("Salary", int, 1), features=Features( Feature("Years", int, 1), Feature("Expertise", int, 1), Feature("Trust", float, 1), ), ) training_data = CSVSource(filename=self.train_filename) test_data = CSVSource(filename=self.test_filename) predict_data = CSVSource(filename=self.predict_filename) # Train the model train(model, training_data) # Assess accuracy accuracy(model, test_data) # Make prediction predictions = [ prediction for prediction in predict(model, predict_data) ] self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70) self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80)
def test_config_set(self): config = FakeTesting.config( parse_unknown( "--test-fake-name", "feedface", "--test-num", "-4.2", "--test-fake-label", "default-label", "--test-fake-readonly", "--test-files", "a", "b", "c", "--test-fake-source", "csv", "--test-source-filename", "file.csv", "--test-features", "Year:int:1", "Commits:int:10", )) self.assertEqual(config.num, -4.2) self.assertEqual(config.files, ["a", "b", "c"]) self.assertEqual(config.name, "feedface") self.assertEqual(config.label, "default-label") self.assertTrue(config.readonly) self.assertTrue(isinstance(config.source, CSVSource)) self.assertEqual(config.source.config.filename, "file.csv") self.assertEqual( config.features, Features(DefFeature("Year", int, 1), DefFeature("Commits", int, 10)), )
async def test_model(self): with tempfile.TemporaryDirectory() as tempdir, patch.object( Model, "load", new=model_load): config = parse_unknown( "--model-directory", tempdir, "--model-features", "Years:int:1", "Experiance:int:1", "--model-predict", "Salary:float:1", ) async with self.post("/configure/model/fake/salary", json=config) as r: self.assertEqual(await r.json(), OK) self.assertIn("salary", self.cli.app["models"]) self.assertEqual( self.cli.app["models"]["salary"].config, FakeModelConfig( directory=pathlib.Path(tempdir), features=Features( Feature("Years", int, 1), Feature("Experiance", int, 1), ), predict=Feature("Salary", float, 1), ), ) with self.subTest(context="salaryctx"): # Create the context async with self.get( "/context/model/salary/salaryctx") as r: self.assertEqual(await r.json(), OK) self.assertIn("salaryctx", self.cli.app["model_contexts"])
class FakeTestingConfig2: name: str = field("Name of FakeTesting2") num: float features: Features = Features( DefFeature("default", int, 1), DefFeature("features", int, 10) ) label: str = "unlabeled"
def test_config_defaults(self): config = FakeTesting.config( parse_unknown( "--test-fake-name", "feedface", "--test-num", "-4.2", "--test-files", "a", "b", "c", "--test-source-filename", "file.json", "--test-features", "Year:int:1", "Commits:int:10", "--test-fake-nums", "100", )) self.assertEqual(config.num, -4.2) self.assertEqual(config.files, ["a", "b", "c"]) self.assertEqual(config.name, "feedface") self.assertEqual(config.label, "unlabeled") self.assertFalse(config.readonly) self.assertTrue(isinstance(config.source, JSONSource)) self.assertEqual(config.source.config.filename, pathlib.Path("file.json")) self.assertEqual( config.features, Features(Feature("Year", int, 1), Feature("Commits", int, 10)), ) self.assertEqual(config.nums, (100, ))
def setUpClass(cls): # Create a temporary directory to store the trained model cls.model_dir = tempfile.TemporaryDirectory() # Create an instance of the model cls.model = AnomalyModel( features=Features( Feature("A", int, 1), Feature("B", int, 2), ), predict=Feature("Y", int, 1), directory=cls.model_dir.name, ) # Generating data _n_data = 1800 _temp_data = np.random.normal(2, 1, size=(2, _n_data)) cls.records = [ Record( "x" + str(random.random()), data={ "features": { "A": float(_temp_data[0][i]), "B": float(_temp_data[1][i]), "Y": (_temp_data[0][i] > 1 - _temp_data[1][i]).astype(int), } }, ) for i in range(0, _n_data) ] cls.trainingsource = Sources( MemorySource(MemorySourceConfig(records=cls.records[:1400]))) cls.testsource = Sources( MemorySource(MemorySourceConfig(records=cls.records[1400:])))
async def test_predict(self): self.required_plugins("dffml-model-scikit") # Import SciKit modules dffml_model_scikit = importlib.import_module("dffml_model_scikit") # Instantiate the model model = dffml_model_scikit.LinearRegressionModel( location=self.mktempdir(), predict=Feature("Salary", int, 1), features=Features( Feature("Years", int, 1), Feature("Expertise", int, 1), Feature("Trust", float, 1), ), ) training_data = CSVSource(filename=self.train_filename) test_data = CSVSource(filename=self.test_filename) predict_data = CSVSource(filename=self.predict_filename) # Train the model await train(model, training_data) # Assess accuracy scorer = MeanSquaredErrorAccuracy() await score(model, scorer, Feature("Salary", int, 1), test_data) # Make prediction predictions = [ prediction async for prediction in predict(model, predict_data) ] self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70) self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80) # Test input data as list await train(model, *self.train_data) await score(model, scorer, Feature("Salary", int, 1), *self.test_data) predictions = [ prediction async for prediction in predict(model, *self.predict_data) ] self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70) self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80)
from dffml_model_tensorflow.dnnr import ( DNNRegressionModel, DNNRegressionModelConfig, ) training_data = CSVSource( CSVSourceConfig(filename="training.csv", readonly=True)) test_data = CSVSource(CSVSourceConfig(filename="test.csv", readonly=True)) predict_data = CSVSource(CSVSourceConfig(filename="predict.csv", readonly=True)) model = DNNRegressionModel( DNNRegressionModelConfig( features=Features( DefFeature("Years", int, 1), DefFeature("Expertise", int, 1), DefFeature("Trust", float, 1), ), predict="Salary", )) Train(model=model, sources=[training_data])() accuracy = Accuracy(model=model, sources=[test_data])() row0, row1 = PredictAll(model=model, sources=[predict_data])() print("Accuracy", accuracy) print(row0) print(row1)