async def test_predict(self): self.required_plugins("dffml-model-scikit") # Import SciKit modules dffml_model_scikit = importlib.import_module("dffml_model_scikit") # Instantiate the model model = dffml_model_scikit.LinearRegressionModel( directory=self.mktempdir(), predict=DefFeature("Salary", int, 1), features=Features( DefFeature("Years", int, 1), DefFeature("Expertise", int, 1), DefFeature("Trust", float, 1), ), ) training_data = CSVSource(filename=self.train_filename) test_data = CSVSource(filename=self.test_filename) predict_data = CSVSource(filename=self.predict_filename) # Train the model await train(model, training_data) # Assess accuracy await accuracy(model, test_data) # Make prediction predictions = [ prediction async for prediction in predict(model, predict_data) ] self.assertEqual(predictions[0][2]["Salary"]["value"], 70) self.assertEqual(predictions[1][2]["Salary"]["value"], 80)
async def test_02_predict(self): # Get the prediction for each piece of test data async for i, features, prediction in predict(self.model, *self.test_data): # Grab the correct value correct = self.test_data[i]["Y"] # Grab the predicted value prediction = prediction["Y"]["value"]
async def test_02_predict(self): target_name = self.model.config.predict.name predictions = [ prediction async for prediction in predict(self.model, self.sources) ] self.assertIn(predictions[0][2][target_name]["value"], ["0", "1"]) self.assertIn(predictions[1][2][target_name]["value"], ["0", "1"])
async def test_02_predict(self): predictions = [ prediction async for prediction in predict(self.model, self.test_sources) ] self.assertIn( isinstance(predictions[0][2]["Answer"]["value"]["0"], str), [True]) self.assertIn( isinstance(predictions[1][2]["Answer"]["value"]["1"], str), [True])
async def test_02_predict(self): # Get the prediction for each piece of test data async for i, features, prediction in predict(self.model, *self.test_data): # Grab the correct value correct = self.test_data[i]["Y"] # Grab the predicted value prediction = prediction["Y"]["value"] # Check that the percent error is less than 10% self.assertLess(prediction, correct * 1.1) self.assertGreater(prediction, correct * (1.0 - 0.1))
async def test_02_predict(self): predictions = [ prediction async for prediction in predict(self.model, self.test_sources) ] self.assertTrue( isinstance(predictions[0][2]["Tag"]["value"][0], tuple) ) self.assertIn( predictions[0][2]["Tag"]["value"][0][1], ["ORG", "PERSON", "LOC"] )
async def test_predict(self): self.required_plugins("dffml-model-scikit") # Import SciKit modules dffml_model_scikit = importlib.import_module("dffml_model_scikit") # Instantiate the model model = dffml_model_scikit.LinearRegressionModel( location=self.mktempdir(), predict=Feature("Salary", int, 1), features=Features( Feature("Years", int, 1), Feature("Expertise", int, 1), Feature("Trust", float, 1), ), ) training_data = CSVSource(filename=self.train_filename) test_data = CSVSource(filename=self.test_filename) predict_data = CSVSource(filename=self.predict_filename) # Train the model await train(model, training_data) # Assess accuracy scorer = MeanSquaredErrorAccuracy() await score(model, scorer, Feature("Salary", int, 1), test_data) # Make prediction predictions = [ prediction async for prediction in predict(model, predict_data) ] self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70) self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80) # Test input data as list await train(model, *self.train_data) await score(model, scorer, Feature("Salary", int, 1), *self.test_data) predictions = [ prediction async for prediction in predict(model, *self.predict_data) ] self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70) self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80)
async def test_02_predict(self): # Get the prediction for each piece of test data async for i, features, prediction in predict( self.model, *[{"X": x, "Y": y} for x, y in TEST_DATA] ): # Grab the correct value correct = features["Y"] # Grab the predicted value prediction = prediction["Y"]["value"] # Check that the prediction is within 10% error of the actual value acceptable = 0.1 self.assertLess(prediction, correct * (1.0 + acceptable)) self.assertGreater(prediction, correct * (1.0 - acceptable))
async def test_02_predict(self): # Get the prediction for each piece of test data async for i, features, prediction in predict(self.model, self.testsource): # Grab the correct value correct = features["Target"] # Grab the predicted value prediction = prediction["Target"]["value"] # Check that the prediction is within 30% error of the actual value error = abs((prediction - correct) / correct) acceptable = 0.5 # Sometimes causes an issue when only one data point anomalously has high error self.assertLess(error, acceptable)
async def test_03_example(self): # Check for unstable data. you are using changes from giving you 0 to 1 binary information to –1 to 1, # then that could be detrimental to the output of the model. # unique values in target at training time unique_train = set() for c in self.records: unique_train.add(c.data.features["Target"]) # unique values in target after prediction unique_predict = set() async for i, features, prediction in predict(self.model, self.testsource): unique_predict.add(prediction["Target"]["value"]) # values in both sets must be equal self.assertTrue(unique_predict == unique_train)
async def test_04_example(self): # Check that model should also work better on imbalanced data # list of correct values of target correct = [] # list of predicted values of target predictions = [] async for i, features, prediction in predict(self.model, self.testsource): correct.append(features["Target"]) predictions.append(prediction["Target"]["value"]) # calculate F1 score res = f1_score(correct, predictions, average="micro") # Ensure the F1 score is above 90% self.assertTrue(0.9 <= res)
async def main(): # Configure the model model = MySLRModel( feature=Feature("Years", int, 1), predict=Feature("Salary", int, 1), directory="model", ) # Train the model await train(model, "train.csv") # Assess accuracy print("Accuracy:", await accuracy(model, "test.csv")) # Make predictions async for i, features, prediction in predict(model, "predict.csv"): features["Salary"] = prediction["Salary"]["value"] print(features)
async def test_model(self): test_feature_val = [ 0, 1.5, 2, ] # inserting zero so that its 1-indexable test_target = 2 * test_feature_val[1] + 3 * test_feature_val[2] # should be same function used in TestDNN.setupclass a = Record( "a", data={ "features": { self.feature1.name: test_feature_val[1], self.feature2.name: test_feature_val[2], } }, ) target_name = self.model.config.predict.name for i in range(0, 7): await train(self.model, self.sources) res = await accuracy(self.model, self.sources) # Retry because of tensorflow intermitant low accuracy if res <= 0.8 and i < 5: print("Retry i:", i, "accuracy:", res) self.model_dir.cleanup() self.model_dir = tempfile.TemporaryDirectory() self.model.config = self.model.config._replace( directory=self.model_dir.name ) continue self.assertGreater(res, 0.8) res = [ record async for record in predict(self.model, a, keep_record=True) ] self.assertEqual(len(res), 1) self.assertEqual(res[0].key, a.key) test_error_norm = abs( (test_target - res[0].prediction(target_name).value) / test_target + 1e-6 ) error_threshold = 0.3 self.assertLess(test_error_norm, error_threshold)
async def test_02_predict(self): right_prediction = 0 wrong_prediction = 0 # Get the prediction for each piece of test data async for i, features, prediction in predict(self.model, self.testsource): # Grab the correct value correct = features["Y"] # Grab the predicted value prediction = prediction["Y"]["value"] if prediction == correct: right_prediction += 1 else: wrong_prediction += 1 # Check that more than 80% of the predictions are correct acceptable = 0.8 self.assertGreater( right_prediction, (right_prediction + wrong_prediction) * (acceptable), )
async def test_02_predict(self): # Sometimes causes an issue when only one data point anomalously has # high error. We count the number of errors and provide a threshold # over which the whole test errors unacceptable_error = 0 # Get the prediction for each piece of test data async for i, features, prediction in predict(self.model, self.testsource): # Grab the correct value correct = features["Target"] # Grab the predicted value prediction = prediction["Target"]["value"] # Check that the prediction is within 30% error of the actual value error = abs((prediction - correct) / correct) acceptable = 0.3 if error > acceptable: unacceptable_error += 1 # Test fails if more than N data points were out of acceptable error self.assertLess(unacceptable_error, 10)
async def main(): logging.basicConfig(level=logging.DEBUG) # Train the model await train(model, train_source) logging.getLogger().setLevel(logging.CRITICAL) # Assess the accuracy acc = await accuracy(model, test_source) print("\nTesting Accuracy: ", acc) # Make Predictions print("\n{:>40} \t {:>10} \t {:>10}\n".format("Image filename", "Prediction", "Confidence")) async for key, features, prediction in predict(model, predict_source): print("{:>40} \t {:>10} \t {:>10}".format( "rps-predict/" + key, prediction["label"]["value"], prediction["label"]["confidence"], ))
async def main(): # Train the model await train(model, train_source) # Assess the accuracy acc = await accuracy(model, test_source) print("\nTesting Accuracy: ", acc) # Make Predictions print( "\n{:>40} \t {:>10} \t {:>10}\n".format( "Image filename", "Prediction", "Confidence" ) ) async for key, features, prediction in predict(model, predict_source): print( "{:>40} \t {:>10} \t {:>10}".format( "rps-predict/" + key, prediction["label"]["value"], prediction["label"]["confidence"], ) )
async def test_model(self): for i in range(0, 7): await train(self.model, self.sources) res = await accuracy(self.model, self.sources) # Retry because of tensorflow intermitant low accuracy if res <= 0.9 and i < 5: print("Retry i:", i, "accuracy:", res) self.model_dir.cleanup() self.model_dir = tempfile.TemporaryDirectory() self.model.config = self.model.config._replace( directory=self.model_dir.name) continue self.assertGreater(res, 0.9) a = Record("a", data={"features": {self.feature.name: 1}}) target_name = self.model.config.predict.name res = [ record async for record in predict(self.model, a, keep_record=True) ] self.assertEqual(len(res), 1) self.assertEqual(res[0].key, a.key) self.assertTrue(res[0].prediction(target_name).value)
async def main(): # Configure the model model = MySLRModel( features=Features(Feature("Years", int, 1)), predict=Feature("Salary", int, 1), location="model", ) # Train the model await train(model, "train.csv") # Assess accuracy scorer = MeanSquaredErrorAccuracy() print( "Accuracy:", await score(model, scorer, Feature("Salary", int, 1), "test.csv"), ) # Make predictions async for i, features, prediction in predict(model, "predict.csv"): features["Salary"] = prediction["Salary"]["value"] print(features)
async def main(): model = LinearRegressionModel( features=Features( DefFeature("Years", int, 1), DefFeature("Expertise", int, 1), DefFeature("Trust", float, 1), ), predict=DefFeature("Salary", int, 1), ) # Train the model await train( model, {"Years": 0, "Expertise": 1, "Trust": 0.1, "Salary": 10}, {"Years": 1, "Expertise": 3, "Trust": 0.2, "Salary": 20}, {"Years": 2, "Expertise": 5, "Trust": 0.3, "Salary": 30}, {"Years": 3, "Expertise": 7, "Trust": 0.4, "Salary": 40}, ) # Assess accuracy print( "Accuracy:", await accuracy( model, {"Years": 4, "Expertise": 9, "Trust": 0.5, "Salary": 50}, {"Years": 5, "Expertise": 11, "Trust": 0.6, "Salary": 60}, ), ) # Make prediction async for i, features, prediction in predict( model, {"Years": 6, "Expertise": 13, "Trust": 0.7}, {"Years": 7, "Expertise": 15, "Trust": 0.8}, ): features["Salary"] = prediction["Salary"]["value"] print(features)
async def main(): model = LinearRegressionModel( features=Features( Feature("Years", int, 1), Feature("Expertise", int, 1), Feature("Trust", float, 1), ), predict=Feature("Salary", int, 1), location="tempdir", ) # Train the model await train( model, { "Years": 0, "Expertise": 1, "Trust": 0.1, "Salary": 10 }, { "Years": 1, "Expertise": 3, "Trust": 0.2, "Salary": 20 }, { "Years": 2, "Expertise": 5, "Trust": 0.3, "Salary": 30 }, { "Years": 3, "Expertise": 7, "Trust": 0.4, "Salary": 40 }, ) # Assess accuracy scorer = MeanSquaredErrorAccuracy() print( "Accuracy:", await score( model, scorer, Feature("Salary", int, 1), { "Years": 4, "Expertise": 9, "Trust": 0.5, "Salary": 50 }, { "Years": 5, "Expertise": 11, "Trust": 0.6, "Salary": 60 }, ), ) # Make prediction async for i, features, prediction in predict( model, { "Years": 6, "Expertise": 13, "Trust": 0.7 }, { "Years": 7, "Expertise": 15, "Trust": 0.8 }, ): features["Salary"] = prediction["Salary"]["value"] print(features)