from knock52 import read_data from sklearn import metrics import joblib def accuracy(feature, label, model_name, vectorizer_name): model = joblib.load(model_name) vectorizer = joblib.load(vectorizer_name) x = vectorizer.transform(feature) prediction = model.predict(x) return metrics.accuracy_score(label, prediction) if __name__ == "__main__": train = open('train.feature.txt') train_ftr, train_label = read_data(train) print('Train Accuracy: ' + str( round(accuracy(train_ftr, train_label, 'model.pkl', 'vectorizer.pkl'), 6))) test = open('test.feature.txt') test_ftr, test_label = read_data(test) print('Test Accuracy: ' + str( round(accuracy(test_ftr, test_label, 'model.pkl', 'vectorizer.pkl'), 6))) ''' Train Accuracy: 0.996441 Test Accuracy: 0.828571 '''
from knock52 import read_data import joblib def test(feature): model = joblib.load('model.pkl') print('Prediction: ' + str(model.predict(feature))) print('Prediction Probability: ' + str(model.predict_proba(feature))) if __name__ == "__main__": text = open('test.feature.txt') feature, label = read_data(text) vectorizer = joblib.load('vectorizer.pkl') x_test = vectorizer.transform(feature) test(x_test)
from knock52 import read_data import joblib from sklearn import metrics def calc_score(feature, label): model = joblib.load('model.pkl') vectorizer = joblib.load('vectorizer.pkl') x = vectorizer.transform(feature) prediction = model.predict(x) print(metrics.classification_report(label, prediction)) if __name__ == "__main__": test = open('test.feature.txt') test_ftr, test_label = read_data(test) calc_score(test_ftr, test_label)
from knock52 import read_data, vectorize import joblib from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.svm import LinearSVC from sklearn.naive_bayes import MultinomialNB # Search for the training algorithms and # parameters that achieves the best accuracy score on the validation data. # Then compute its accuracy score on the test data. if __name__ == "__main__": vectorizer = joblib.load('vectorizer.pkl') train = open('train.feature.txt') train_ftr, train_label = read_data(train) valid = open('valid.feature.txt') valid_ftr, valid_label = read_data(valid) test = open('test.feature.txt') test_ftr, test_label = read_data(test) x_train = vectorizer.transform(train_ftr) y_train = train_label x_valid = vectorizer.transform(valid_ftr) y_valid = valid_label x_test = vectorizer.transform(test_ftr) y_test = test_label # Multinomial NB nb_model = MultinomialNB() nb_model.fit(x_train, y_train)