Пример #1
0
from knock52 import read_data
from sklearn import metrics
import joblib


def accuracy(feature, label, model_name, vectorizer_name):
    model = joblib.load(model_name)
    vectorizer = joblib.load(vectorizer_name)
    x = vectorizer.transform(feature)
    prediction = model.predict(x)
    return metrics.accuracy_score(label, prediction)


if __name__ == "__main__":
    train = open('train.feature.txt')
    train_ftr, train_label = read_data(train)
    print('Train Accuracy: ' + str(
        round(accuracy(train_ftr, train_label, 'model.pkl', 'vectorizer.pkl'),
              6)))

    test = open('test.feature.txt')
    test_ftr, test_label = read_data(test)
    print('Test Accuracy: ' + str(
        round(accuracy(test_ftr, test_label, 'model.pkl', 'vectorizer.pkl'),
              6)))
'''
Train Accuracy: 0.996441
Test Accuracy: 0.828571
'''
Пример #2
0
from knock52 import read_data
import joblib


def test(feature):
    model = joblib.load('model.pkl')
    print('Prediction: ' + str(model.predict(feature)))
    print('Prediction Probability: ' + str(model.predict_proba(feature)))


if __name__ == "__main__":
    text = open('test.feature.txt')
    feature, label = read_data(text)
    vectorizer = joblib.load('vectorizer.pkl')
    x_test = vectorizer.transform(feature)
    test(x_test)
Пример #3
0
from knock52 import read_data
import joblib
from sklearn import metrics


def calc_score(feature, label):
    model = joblib.load('model.pkl')
    vectorizer = joblib.load('vectorizer.pkl')
    x = vectorizer.transform(feature)
    prediction = model.predict(x)

    print(metrics.classification_report(label, prediction))


if __name__ == "__main__":
    test = open('test.feature.txt')
    test_ftr, test_label = read_data(test)
    calc_score(test_ftr, test_label)
Пример #4
0
from knock52 import read_data, vectorize
import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import MultinomialNB

# Search for the training algorithms and
# parameters that achieves the best accuracy score on the validation data.
# Then compute its accuracy score on the test data.

if __name__ == "__main__":
    vectorizer = joblib.load('vectorizer.pkl')

    train = open('train.feature.txt')
    train_ftr, train_label = read_data(train)
    valid = open('valid.feature.txt')
    valid_ftr, valid_label = read_data(valid)
    test = open('test.feature.txt')
    test_ftr, test_label = read_data(test)

    x_train = vectorizer.transform(train_ftr)
    y_train = train_label
    x_valid = vectorizer.transform(valid_ftr)
    y_valid = valid_label
    x_test = vectorizer.transform(test_ftr)
    y_test = test_label

    # Multinomial NB
    nb_model = MultinomialNB()
    nb_model.fit(x_train, y_train)