X = X.astype(float) X = scale(X) # remove useless features: useless_features = (X.max(axis=0) - X.min(axis=0))==0 for j in reversed(range(n_features)): if useless_features[j]: X = np.delete(X, (j), axis=1) X_train = X[:n_samples_train, :] X_test = X[n_samples_train:, :] y_train = y[:n_samples_train] y_test = y[n_samples_train:] ### cross-val: #### parameters = {'max_depth':[1000] , 'max_samples':[.1], 'max_features':[min(10, n_features)], 'n_estimators':[50]} model = OneClassRF() clf = grid_search.GridSearchCV(model, parameters, refit=False,cv=2) clf.fit(X_train, y_train) print('clf.best_params_', clf.best_params_) model.set_params(**clf.best_params_) print('OneClassRF processing...') # weird: without CV but with same parameters, no error: #model = OneClassRF(max_depth=1000, max_samples=.1, max_features=min(10, n_features), n_estimators=50, random_state=rng, n_jobs=-1) #commented since cross val tstart = time() ### training only on normal data: X_train = X_train[y_train==0] y_train = y_train[y_train==0]
if useless_features[j]: X = np.delete(X, (j), axis=1) X_train = X[:n_samples_train, :] X_test = X[n_samples_train:, :] y_train = y[:n_samples_train] y_test = y[n_samples_train:] ### cross-val: #### parameters = { 'max_depth': [1000], 'max_samples': [.1], 'max_features': [min(10, n_features)], 'n_estimators': [50] } model = OneClassRF() clf = grid_search.GridSearchCV(model, parameters, refit=False, cv=2) clf.fit(X_train, y_train) print('clf.best_params_', clf.best_params_) model.set_params(**clf.best_params_) print('OneClassRF processing...') # weird: without CV but with same parameters, no error: #model = OneClassRF(max_depth=1000, max_samples=.1, max_features=min(10, n_features), n_estimators=50, random_state=rng, n_jobs=-1) #commented since cross val tstart = time() ### training only on normal data: X_train = X_train[y_train == 0] y_train = y_train[y_train == 0] model.fit(X_train)
precision = np.zeros(n_axis) fit_time = 0 predict_time = 0 try: for ne in range(nb_exp): print 'exp num:', ne X, y = sh(X, y) X_train = X[:n_samples_train, :] X_test = X[n_samples_train:, :] y_train = y[:n_samples_train] y_test = y[n_samples_train:] print('OneClassRF processing...') model = OneClassRF() # # training only on normal data: (not supported in cv) # X_train = X_train[y_train == 0] # y_train = y_train[y_train == 0] tstart = time() model.fit(X_train) fit_time += time() - tstart tstart = time() scoring = model.predict(X_test) # the lower, the more normal # scoring = scale(scoring) predict_time += time() - tstart fpr_, tpr_ = roc_curve(y_test, scoring)[:2] f = interp1d(fpr_, tpr_)
import numpy as np import matplotlib.pyplot as plt from sklearn.ensemble import OneClassRF from sklearn.preprocessing import scale rng = np.random.RandomState(42) # Generate train data X = 0.3 * rng.randn(100, 2) X_train = np.r_[X + 2, X - 2] # fit the model clf = OneClassRF(n_estimators=1, max_samples=1., max_depth=4, random_state=rng) clf.fit(X_train) # y_pred_train = -clf.decision_function(X_train) # y_pred_test = -clf.decision_function(X_test) # y_pred_outliers = -clf.decision_function(X_outliers) # plot the line, the samples, and the nearest vectors to the plane xx, yy = np.meshgrid(np.linspace(-5, 5, 200), np.linspace(-5, 5, 200)) Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) #Z = scale(Z) Z = np.log(Z) #plt.title("OneClassRF") levels = np.linspace(Z.min(), Z.max(), 1000) plt.contourf(xx, yy, Z, cmap=plt.cm.Blues, levels=levels) plt.scatter(X_train[:, 0], X_train[:, 1], c='white') plt.axis('tight')