def test_predict_proba(self, data): model = VWClassifier() model.fit(data.x, data.y) actual = model.predict_proba(data.x) assert actual.shape[0] == 100 assert np.allclose(actual[0], [0.3997, 0.6003], atol=1e-4)
print(seenMovie) print(metadata) print("Data loaded") print(seenMovie.shape, '\t', metadata.shape) seenMovie = seenMovie.astype('int') # split train and test set X_train, X_test, y_train, y_test = train_test_split(metadata, seenMovie, test_size=0.3, random_state=256) # build VW logistic regression model # LogLossVal: 0.013283467177640678 # Mean Square Error of the Log for the 1st model: 0.00018478019510039388 print('Train\n', y_train) model = VWClassifier(loss_function='logistic') model.fit(X_train, y_train) # predict model y_pred = model.predict_proba(X_test) print(y_pred) print("Training complete for model 1...") print("starting LogLoss...") # get log loss for linear regression model logLossVal = log_loss(y_test, y_pred, eps=1e-15, normalize=True, sample_weight=None, labels=None) # r2_score_nnls = r2_score(y_test, y_pred) # print("NNLS R2 score", r2_score_nnls) # mse_1 = calculateMeanSquareError(y_test, y_pred) # m1_recall = recall_score(y_test, y_pred, average='binary', zero_division=0) # m1_precision = precision_score(y_test, y_pred, average='binary', zero_division=0) # accuracy_m1 = accuracy_score(y_test, y_pred); # print("Model 1 Accuracy: ", accuracy_m1) print("LogLoss Model 1: ", logLossVal) # print("Mean Square Error of the Log for the 1st model: ", mse_1)