def fit(self, X): np.random.seed(self.random_state) self.n_sample = X.shape[0] x_arr, y_arr = [], [] for i in np.arange(self.lower_bound, self.higher_bound): sample_size = 2**i sample = X[np.random.choice(self.n_sample, sample_size, replace=True)] clf = IsolationForest(random_state=self.random_state, max_samples=sample_size, contamination='auto').fit( sample, max_depth=100000000) depths = np.mean(clf._compute_actual_depth_leaf(sample)[0], axis=0) bins = np.arange(int(depths.min()), int(depths.max() + 2)) y, x = np.histogram(depths, bins=bins) y, x = y + 1, x[:-1] break_point = np.argmax(y) x_arr.append([i]) y_arr.append(x[break_point]) self.reg = LinearRegression(fit_intercept=False).fit(x_arr, y_arr) self.clf = IsolationForest(random_state=self.random_state, max_samples=len(X), contamination='auto').fit( X, max_depth=self.max_depth) return self
def __init__(self, n_estimators=100, max_samples=256): self.model = IsolationForest(n_estimators, max_samples) self.threshold = 0.6 ## Recommended threshold in IForest paper. self.trainedStatus = False