示例#1
0
def wrap_high_level_accuracy(state):
    model = SLRModel(
        features=Features(Feature("Years", int, 1), ),
        predict=Feature("Salary", int, 1),
        location="tempdir",
    )

    train(
        model,
        {
            "Years": 0,
            "Salary": 10
        },
        {
            "Years": 1,
            "Salary": 20
        },
        {
            "Years": 2,
            "Salary": 30
        },
        {
            "Years": 3,
            "Salary": 40
        },
    )

    yield
示例#2
0
    def test_predict(self):
        self.required_plugins("dffml-model-scikit")
        # Import SciKit modules
        dffml_model_scikit = importlib.import_module("dffml_model_scikit")
        # Instantiate the model
        model = dffml_model_scikit.LinearRegressionModel(
            directory=self.mktempdir(),
            predict=Feature("Salary", int, 1),
            features=Features(
                Feature("Years", int, 1),
                Feature("Expertise", int, 1),
                Feature("Trust", float, 1),
            ),
        )

        training_data = CSVSource(filename=self.train_filename)
        test_data = CSVSource(filename=self.test_filename)
        predict_data = CSVSource(filename=self.predict_filename)

        # Train the model
        train(model, training_data)
        # Assess accuracy
        accuracy(model, test_data)
        # Make prediction
        predictions = [
            prediction for prediction in predict(model, predict_data)
        ]
        self.assertEqual(round(predictions[0][2]["Salary"]["value"]), 70)
        self.assertEqual(round(predictions[1][2]["Salary"]["value"]), 80)
示例#3
0
def wrap_noasync_accuracy(state):
    model = SLRModel(
        features=Features(Feature("Years", int, 1), ),
        predict=Feature("Salary", int, 1),
        directory="tempdir",
    )

    train(
        model,
        {
            "Years": 0,
            "Salary": 10
        },
        {
            "Years": 1,
            "Salary": 20
        },
        {
            "Years": 2,
            "Salary": 30
        },
        {
            "Years": 3,
            "Salary": 40
        },
    )

    yield
示例#4
0
def main():
    # Train the model
    train(model, "train.csv")

    # Assess accuracy
    print("Accuracy:", accuracy(model, "test.csv"))

    # Make prediction
    for i, features, prediction in predict(model, "predict.csv"):
        features["TARGET"] = prediction["TARGET"]["value"]
        print(features)
示例#5
0
def main():
    # Train the model
    train(model, "train.csv")

    # Assess accuracy
    scorer = MeanSquaredErrorAccuracy()
    print(
        "Accuracy:",
        score(model, scorer, Feature("TARGET", float, 1), "test.csv"),
    )

    # Make prediction
    for i, features, prediction in predict(model, "predict.csv"):
        features["TARGET"] = prediction["TARGET"]["value"]
        print(features)
示例#6
0
from dffml import CSVSource, Features, Feature
from dffml.noasync import train, accuracy, predict
from dffml_model_scratch.logisticregression import LogisticRegression

model = LogisticRegression(
    features=Features(Feature("f1", float, 1)),
    predict=Feature("ans", int, 1),
)

# Train the model
train(model, "dataset.csv")

# Assess accuracy (alternate way of specifying data source)
print("Accuracy:", accuracy(model, CSVSource(filename="dataset.csv")))

# Make prediction
for i, features, prediction in predict(model, {"f1": 0.8, "ans": 0}):
    features["ans"] = prediction["ans"]["value"]
    print(features)
示例#7
0
from dffml import CSVSource, DefFeature
from dffml.noasync import train, accuracy, predict
from dffml_model_transformers.ner.ner_model import NERModel

model = NERModel(
    sid=DefFeature("SentenceId", int, 1),
    words=DefFeature("Words", str, 1),
    predict=DefFeature("Tag", str, 1),
    model_architecture_type="distilbert",
    model_name_or_path="distilbert-base-cased",
    epochs=1,
    no_cuda=True,
)

# Train the model
train(model, "train.csv")

# Assess accuracy (alternate way of specifying data source)
print("Accuracy:", accuracy(model, CSVSource(filename="train.csv")))

# Make prediction
for i, features, prediction in predict(
    model,
    {"SentenceID": 1, "Words": "DFFML models can do NER",},
    {"SentenceID": 2, "Words": "DFFML models can do regression",},
):
    features["Tag"] = prediction["Tag"]["value"]
    print(features)
示例#8
0
# Train the model
train(
    model,
    {
        "Years": 0,
        "Expertise": 1,
        "Trust": 0.1,
        "Salary": 10
    },
    {
        "Years": 1,
        "Expertise": 3,
        "Trust": 0.2,
        "Salary": 20
    },
    {
        "Years": 2,
        "Expertise": 5,
        "Trust": 0.3,
        "Salary": 30
    },
    {
        "Years": 3,
        "Expertise": 7,
        "Trust": 0.4,
        "Salary": 40
    },
)

# Assess accuracy
示例#9
0
from dffml import CSVSource, Features, Feature
from dffml.noasync import train, accuracy, predict
from dffml_model_scikit import LinearRegressionModel

model = LinearRegressionModel(
    features=Features(
        Feature("Years", int, 1),
        Feature("Expertise", int, 1),
        Feature("Trust", float, 1),
    ),
    predict=Feature("Salary", int, 1),
    directory="tempdir",
)

# Train the model
train(model, "training.csv")

# Assess accuracy (alternate way of specifying data source)
print("Accuracy:", accuracy(model, CSVSource(filename="test.csv")))

# Make prediction
for i, features, prediction in predict(
        model,
    {
        "Years": 6,
        "Expertise": 13,
        "Trust": 0.7
    },
    {
        "Years": 7,
        "Expertise": 15,
示例#10
0
            float,
        )),
        predict=Feature("target", float, 1),
        directory="model",
        max_depth=3,
        learning_rate=0.01,
        n_estimators=200,
        reg_lambda=1,
        reg_alpha=0,
        gamma=0,
        colsample_bytree=0,
        subsample=1,
    ))

# Train the model
train(model, *[{"data": x, "target": y} for x, y in zip(trainX, trainy)])

# Assess accuracy
print(
    "Test accuracy:",
    accuracy(model, *[{
        "data": x,
        "target": y
    } for x, y in zip(testX, testy)]),
)
print(
    "Training accuracy:",
    accuracy(model, *[{
        "data": x,
        "target": y
    } for x, y in zip(trainX, trainy)]),
示例#11
0
model = DNNClassifierModel(
    features=Features(
        DefFeature("SepalLength", float, 1),
        DefFeature("SepalWidth", float, 1),
        DefFeature("PetalLength", float, 1),
        DefFeature("PetalWidth", float, 1),
    ),
    predict=DefFeature("classification", int, 1),
    epochs=3000,
    steps=20000,
    classifications=[0, 1, 2],
    clstype=int,
)

# Train the model
train(model, "iris_training.csv")

# Assess accuracy (alternate way of specifying data source)
print("Accuracy:", accuracy(model, CSVSource(filename="iris_test.csv")))

# Make prediction
for i, features, prediction in predict(
        model,
    {
        "PetalLength": 4.2,
        "PetalWidth": 1.5,
        "SepalLength": 5.9,
        "SepalWidth": 3.0,
    },
    {
        "PetalLength": 5.4,
示例#12
0
from dffml_model_scratch.anomalydetection import AnomalyModel
from dffml_model_scratch.anomaly_detection_scorer import (
    AnomalyDetectionAccuracy,
)

# Configure the model

model = AnomalyModel(
    features=Features(Feature("A", int, 2),),
    predict=Feature("Y", int, 1),
    location="model",
)


# Train the model
train(model, "trainex.csv")

# Assess accuracy for test set
scorer = AnomalyDetectionAccuracy()
print(
    "Test set F1 score :",
    score(model, scorer, Feature("Y", int, 1), "testex.csv"),
)

# Assess accuracy for training set
print(
    "Training set F1 score :",
    score(model, scorer, Feature("Y", int, 1), "trainex.csv"),
)