Exemplo n.º 1
0
    async def test_predict(self):
        self.required_plugins("dffml-model-scikit")
        # Import SciKit modules
        dffml_model_scikit = importlib.import_module("dffml_model_scikit")
        # Instantiate the model
        model = dffml_model_scikit.LinearRegressionModel(
            directory=self.mktempdir(),
            predict=DefFeature("Salary", int, 1),
            features=Features(
                DefFeature("Years", int, 1),
                DefFeature("Expertise", int, 1),
                DefFeature("Trust", float, 1),
            ),
        )

        training_data = CSVSource(filename=self.train_filename)
        test_data = CSVSource(filename=self.test_filename)
        predict_data = CSVSource(filename=self.predict_filename)

        # Train the model
        await train(model, training_data)
        # Assess accuracy
        await accuracy(model, test_data)
        # Make prediction
        predictions = [
            prediction async for prediction in predict(model, predict_data)
        ]
        self.assertEqual(predictions[0][2]["Salary"]["value"], 70)
        self.assertEqual(predictions[1][2]["Salary"]["value"], 80)
Exemplo n.º 2
0
 async def test_02_predict(self):
     # Get the prediction for each piece of test data
     async for i, features, prediction in predict(self.model,
                                                  *self.test_data):
         # Grab the correct value
         correct = self.test_data[i]["Y"]
         # Grab the predicted value
         prediction = prediction["Y"]["value"]
Exemplo n.º 3
0
 async def test_02_predict(self):
     target_name = self.model.config.predict.name
     predictions = [
         prediction
         async for prediction in predict(self.model, self.sources)
     ]
     self.assertIn(predictions[0][2][target_name]["value"], ["0", "1"])
     self.assertIn(predictions[1][2][target_name]["value"], ["0", "1"])
Exemplo n.º 4
0
    async def test_02_predict(self):
        predictions = [
            prediction
            async for prediction in predict(self.model, self.test_sources)
        ]

        self.assertIn(
            isinstance(predictions[0][2]["Answer"]["value"]["0"], str), [True])
        self.assertIn(
            isinstance(predictions[1][2]["Answer"]["value"]["1"], str), [True])
Exemplo n.º 5
0
 async def test_02_predict(self):
     # Get the prediction for each piece of test data
     async for i, features, prediction in predict(self.model,
                                                  *self.test_data):
         # Grab the correct value
         correct = self.test_data[i]["Y"]
         # Grab the predicted value
         prediction = prediction["Y"]["value"]
         # Check that the percent error is less than 10%
         self.assertLess(prediction, correct * 1.1)
         self.assertGreater(prediction, correct * (1.0 - 0.1))
Exemplo n.º 6
0
 async def test_02_predict(self):
     predictions = [
         prediction
         async for prediction in predict(self.model, self.test_sources)
     ]
     self.assertTrue(
         isinstance(predictions[0][2]["Tag"]["value"][0], tuple)
     )
     self.assertIn(
         predictions[0][2]["Tag"]["value"][0][1], ["ORG", "PERSON", "LOC"]
     )
Exemplo n.º 7
0
    async def test_predict(self):
        self.required_plugins("dffml-model-scikit")
        # Import SciKit modules
        dffml_model_scikit = importlib.import_module("dffml_model_scikit")
        # Instantiate the model
        model = dffml_model_scikit.LinearRegressionModel(
            location=self.mktempdir(),
            predict=Feature("Salary", int, 1),
            features=Features(
                Feature("Years", int, 1),
                Feature("Expertise", int, 1),
                Feature("Trust", float, 1),
            ),
        )

        training_data = CSVSource(filename=self.train_filename)
        test_data = CSVSource(filename=self.test_filename)
        predict_data = CSVSource(filename=self.predict_filename)

        # Train the model
        await train(model, training_data)
        # Assess accuracy
        scorer = MeanSquaredErrorAccuracy()
        await score(model, scorer, Feature("Salary", int, 1), test_data)
        # Make prediction
        predictions = [
            prediction async for prediction in predict(model, predict_data)
        ]
        self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70)
        self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80)

        # Test input data as list
        await train(model, *self.train_data)
        await score(model, scorer, Feature("Salary", int, 1), *self.test_data)
        predictions = [
            prediction
            async for prediction in predict(model, *self.predict_data)
        ]
        self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70)
        self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80)
Exemplo n.º 8
0
 async def test_02_predict(self):
     # Get the prediction for each piece of test data
     async for i, features, prediction in predict(
         self.model, *[{"X": x, "Y": y} for x, y in TEST_DATA]
     ):
         # Grab the correct value
         correct = features["Y"]
         # Grab the predicted value
         prediction = prediction["Y"]["value"]
         # Check that the prediction is within 10% error of the actual value
         acceptable = 0.1
         self.assertLess(prediction, correct * (1.0 + acceptable))
         self.assertGreater(prediction, correct * (1.0 - acceptable))
Exemplo n.º 9
0
    async def test_02_predict(self):
        # Get the prediction for each piece of test data
        async for i, features, prediction in predict(self.model,
                                                     self.testsource):
            # Grab the correct value
            correct = features["Target"]
            # Grab the predicted value
            prediction = prediction["Target"]["value"]
            # Check that the prediction is within 30% error of the actual value
            error = abs((prediction - correct) / correct)

            acceptable = 0.5
            # Sometimes causes an issue when only one data point anomalously has high error
            self.assertLess(error, acceptable)
Exemplo n.º 10
0
    async def test_03_example(self):
        # Check for unstable data. you are using changes from giving you 0 to 1 binary information to –1 to 1,
        # then that could be detrimental to the output of the model.
        # unique values in target at training time
        unique_train = set()
        for c in self.records:
            unique_train.add(c.data.features["Target"])

        # unique values in target after prediction
        unique_predict = set()
        async for i, features, prediction in predict(self.model,
                                                     self.testsource):
            unique_predict.add(prediction["Target"]["value"])

        # values in both sets must be equal
        self.assertTrue(unique_predict == unique_train)
Exemplo n.º 11
0
    async def test_04_example(self):
        # Check that model should also work better on imbalanced data
        # list of correct values of target
        correct = []
        # list of predicted values of target
        predictions = []
        async for i, features, prediction in predict(self.model,
                                                     self.testsource):
            correct.append(features["Target"])
            predictions.append(prediction["Target"]["value"])

        # calculate F1 score
        res = f1_score(correct, predictions, average="micro")

        # Ensure the F1 score is above 90%
        self.assertTrue(0.9 <= res)
Exemplo n.º 12
0
async def main():
    # Configure the model
    model = MySLRModel(
        feature=Feature("Years", int, 1),
        predict=Feature("Salary", int, 1),
        directory="model",
    )

    # Train the model
    await train(model, "train.csv")

    # Assess accuracy
    print("Accuracy:", await accuracy(model, "test.csv"))

    # Make predictions
    async for i, features, prediction in predict(model, "predict.csv"):
        features["Salary"] = prediction["Salary"]["value"]
        print(features)
Exemplo n.º 13
0
 async def test_model(self):
     test_feature_val = [
         0,
         1.5,
         2,
     ]  # inserting zero so that its 1-indexable
     test_target = 2 * test_feature_val[1] + 3 * test_feature_val[2]
     # should be same function used in TestDNN.setupclass
     a = Record(
         "a",
         data={
             "features": {
                 self.feature1.name: test_feature_val[1],
                 self.feature2.name: test_feature_val[2],
             }
         },
     )
     target_name = self.model.config.predict.name
     for i in range(0, 7):
         await train(self.model, self.sources)
         res = await accuracy(self.model, self.sources)
         # Retry because of tensorflow intermitant low accuracy
         if res <= 0.8 and i < 5:
             print("Retry i:", i, "accuracy:", res)
             self.model_dir.cleanup()
             self.model_dir = tempfile.TemporaryDirectory()
             self.model.config = self.model.config._replace(
                 directory=self.model_dir.name
             )
             continue
         self.assertGreater(res, 0.8)
         res = [
             record
             async for record in predict(self.model, a, keep_record=True)
         ]
         self.assertEqual(len(res), 1)
         self.assertEqual(res[0].key, a.key)
         test_error_norm = abs(
             (test_target - res[0].prediction(target_name).value)
             / test_target
             + 1e-6
         )
         error_threshold = 0.3
         self.assertLess(test_error_norm, error_threshold)
Exemplo n.º 14
0
 async def test_02_predict(self):
     right_prediction = 0
     wrong_prediction = 0
     # Get the prediction for each piece of test data
     async for i, features, prediction in predict(self.model,
                                                  self.testsource):
         # Grab the correct value
         correct = features["Y"]
         # Grab the predicted value
         prediction = prediction["Y"]["value"]
         if prediction == correct:
             right_prediction += 1
         else:
             wrong_prediction += 1
     # Check that more than 80% of the predictions are correct
     acceptable = 0.8
     self.assertGreater(
         right_prediction,
         (right_prediction + wrong_prediction) * (acceptable),
     )
Exemplo n.º 15
0
    async def test_02_predict(self):
        # Sometimes causes an issue when only one data point anomalously has
        # high error. We count the number of errors and provide a threshold
        # over which the whole test errors
        unacceptable_error = 0
        # Get the prediction for each piece of test data
        async for i, features, prediction in predict(self.model,
                                                     self.testsource):
            # Grab the correct value
            correct = features["Target"]
            # Grab the predicted value
            prediction = prediction["Target"]["value"]
            # Check that the prediction is within 30% error of the actual value
            error = abs((prediction - correct) / correct)

            acceptable = 0.3
            if error > acceptable:
                unacceptable_error += 1

        # Test fails if more than N data points were out of acceptable error
        self.assertLess(unacceptable_error, 10)
Exemplo n.º 16
0
async def main():
    logging.basicConfig(level=logging.DEBUG)

    # Train the model
    await train(model, train_source)

    logging.getLogger().setLevel(logging.CRITICAL)

    # Assess the accuracy
    acc = await accuracy(model, test_source)
    print("\nTesting Accuracy: ", acc)

    # Make Predictions
    print("\n{:>40} \t {:>10} \t {:>10}\n".format("Image filename",
                                                  "Prediction", "Confidence"))
    async for key, features, prediction in predict(model, predict_source):
        print("{:>40} \t {:>10} \t {:>10}".format(
            "rps-predict/" + key,
            prediction["label"]["value"],
            prediction["label"]["confidence"],
        ))
Exemplo n.º 17
0
async def main():
    # Train the model
    await train(model, train_source)

    # Assess the accuracy
    acc = await accuracy(model, test_source)
    print("\nTesting Accuracy: ", acc)

    # Make Predictions
    print(
        "\n{:>40} \t {:>10} \t {:>10}\n".format(
            "Image filename", "Prediction", "Confidence"
        )
    )
    async for key, features, prediction in predict(model, predict_source):
        print(
            "{:>40} \t {:>10} \t {:>10}".format(
                "rps-predict/" + key,
                prediction["label"]["value"],
                prediction["label"]["confidence"],
            )
        )
Exemplo n.º 18
0
 async def test_model(self):
     for i in range(0, 7):
         await train(self.model, self.sources)
         res = await accuracy(self.model, self.sources)
         # Retry because of tensorflow intermitant low accuracy
         if res <= 0.9 and i < 5:
             print("Retry i:", i, "accuracy:", res)
             self.model_dir.cleanup()
             self.model_dir = tempfile.TemporaryDirectory()
             self.model.config = self.model.config._replace(
                 directory=self.model_dir.name)
             continue
         self.assertGreater(res, 0.9)
         a = Record("a", data={"features": {self.feature.name: 1}})
         target_name = self.model.config.predict.name
         res = [
             record
             async for record in predict(self.model, a, keep_record=True)
         ]
         self.assertEqual(len(res), 1)
         self.assertEqual(res[0].key, a.key)
         self.assertTrue(res[0].prediction(target_name).value)
Exemplo n.º 19
0
async def main():
    # Configure the model
    model = MySLRModel(
        features=Features(Feature("Years", int, 1)),
        predict=Feature("Salary", int, 1),
        location="model",
    )

    # Train the model
    await train(model, "train.csv")

    # Assess accuracy
    scorer = MeanSquaredErrorAccuracy()
    print(
        "Accuracy:",
        await score(model, scorer, Feature("Salary", int, 1), "test.csv"),
    )

    # Make predictions
    async for i, features, prediction in predict(model, "predict.csv"):
        features["Salary"] = prediction["Salary"]["value"]
        print(features)
Exemplo n.º 20
0
async def main():
    model = LinearRegressionModel(
        features=Features(
            DefFeature("Years", int, 1),
            DefFeature("Expertise", int, 1),
            DefFeature("Trust", float, 1),
        ),
        predict=DefFeature("Salary", int, 1),
    )

    # Train the model
    await train(
        model,
        {"Years": 0, "Expertise": 1, "Trust": 0.1, "Salary": 10},
        {"Years": 1, "Expertise": 3, "Trust": 0.2, "Salary": 20},
        {"Years": 2, "Expertise": 5, "Trust": 0.3, "Salary": 30},
        {"Years": 3, "Expertise": 7, "Trust": 0.4, "Salary": 40},
    )

    # Assess accuracy
    print(
        "Accuracy:",
        await accuracy(
            model,
            {"Years": 4, "Expertise": 9, "Trust": 0.5, "Salary": 50},
            {"Years": 5, "Expertise": 11, "Trust": 0.6, "Salary": 60},
        ),
    )

    # Make prediction
    async for i, features, prediction in predict(
        model,
        {"Years": 6, "Expertise": 13, "Trust": 0.7},
        {"Years": 7, "Expertise": 15, "Trust": 0.8},
    ):
        features["Salary"] = prediction["Salary"]["value"]
        print(features)
Exemplo n.º 21
0
async def main():
    model = LinearRegressionModel(
        features=Features(
            Feature("Years", int, 1),
            Feature("Expertise", int, 1),
            Feature("Trust", float, 1),
        ),
        predict=Feature("Salary", int, 1),
        location="tempdir",
    )

    # Train the model
    await train(
        model,
        {
            "Years": 0,
            "Expertise": 1,
            "Trust": 0.1,
            "Salary": 10
        },
        {
            "Years": 1,
            "Expertise": 3,
            "Trust": 0.2,
            "Salary": 20
        },
        {
            "Years": 2,
            "Expertise": 5,
            "Trust": 0.3,
            "Salary": 30
        },
        {
            "Years": 3,
            "Expertise": 7,
            "Trust": 0.4,
            "Salary": 40
        },
    )

    # Assess accuracy
    scorer = MeanSquaredErrorAccuracy()
    print(
        "Accuracy:",
        await score(
            model,
            scorer,
            Feature("Salary", int, 1),
            {
                "Years": 4,
                "Expertise": 9,
                "Trust": 0.5,
                "Salary": 50
            },
            {
                "Years": 5,
                "Expertise": 11,
                "Trust": 0.6,
                "Salary": 60
            },
        ),
    )

    # Make prediction
    async for i, features, prediction in predict(
        model,
        {
            "Years": 6,
            "Expertise": 13,
            "Trust": 0.7
        },
        {
            "Years": 7,
            "Expertise": 15,
            "Trust": 0.8
        },
    ):
        features["Salary"] = prediction["Salary"]["value"]
        print(features)