def test_decision_function(self, data): classes = np.array([-1., 1.]) raw_model = VW(loss_function='logistic') raw_model.fit(data.x, data.y) predictions = raw_model.predict(data.x) class_indices = (predictions > 0).astype(np.int) class_predictions = classes[class_indices] model = VWClassifier() model.fit(data.x, data.y) assert np.allclose(class_predictions, model.predict(data.x))
def sanitycheck(self): X, y = datasets.make_hastie_10_2(n_samples=1000, random_state=1) X = X.astype(numpy.float32) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=256) model = VWClassifier() model.fit(X_train, y_train) y_pred = model.predict(X_test) score_train = model.score(X_train, y_train) scoer_test = model.score(X_test, y_test) return # ----------------------------------------------------------------------------------------------------------------------
from vowpalwabbit.sklearn_vw import VWClassifier X = [[1, 2], [3, 4], [5, 6], [7, 8]] y = [-1, -1, 1, 1] model = VWClassifier(loss_function='logistic', l=0.01, l2=0.1) model.fit(X, y) print(model.predict(X)) print(model.score(X, y))
text_filename = os.listdir('D:/cadec/text')[i] Ori_filename = os.listdir('D:/cadec/Ori')[i] objects.append( document('D:/cadec/text/' + text_filename, 'D:/cadec/Ori/' + Ori_filename, dic)) print(i) sel = (len(objects)) x, y, l = transform(objects[:sel], dic) train_cut = int(0.75 * (len(objects))) test_cut = (len(objects)) x_train, y_train, l_train = transform(objects[:train_cut], dic) x_test, y_test, l_test = transform(objects[train_cut:test_cut], dic) x, y = clean(x, y) x_train, y_train = clean(x_train, y_train) x_test, y_test = clean(x_test, y_test) print(x_test) print(y_test) print("completed") vecx = DictVectorizer(sparse=True) vecy = DictVectorizer(sparse=False) train_len = (x_train) test_len = (x_test) clf = VWClassifier() clf.fit(x_train, y_train) y_pred = clf.predict(x_test) print(len(y_pred)) print((y_pred)) print((y_test)) recall_str = classification_report(y_test, y_pred) print(recall_str)
# from vowpalwabbit import pyvw # # vw = pyvw.vw(quiet=True) # ex = vw.example('1 | a b c') # vw.learn(ex) # vw.predict(ex) import numpy as np from sklearn import datasets from sklearn.model_selection import train_test_split from vowpalwabbit.pyvw import vw from vowpalwabbit.sklearn_vw import VWClassifier # generate some data X, y = datasets.make_hastie_10_2(n_samples=10000, random_state=1) X = X.astype(np.float32) # split train and test set X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=256) # build model model = VWClassifier() model.fit(X_train, y_train) # predict model y_pred = model.predict(X_test) print(y_pred) # evaluate model model.score(X_train, y_train) model.score(X_test, y_test)