def _Unsupervised_nolabel(value_train, anomaly_ratio=0.004): N = len(value_train) mean, sigma = np.mean(value_train), np.std(value_train) print("\nCalculating features...\n") MA1, MA2, MA3, MA4, MA5, MA6, Dif1, Dif2, Dif3, Dif4, Dif5, Dif6 = moving_average( value_train) feature_3_sigma = naive_sigma(value_train, 3, mean, sigma) feature_4_sigma = naive_sigma(value_train, 4, mean, sigma) feature_5_sigma = naive_sigma(value_train, 5, mean, sigma) feature_mean_diff = sigma_diff(value_train, mean) feature_holtwinter_diff, feature_holtwinter, predicted_holtwinter, hw_smoothdiff, hw_smooth, hw_mul, hw_sigma, hw_params = holtWinters( value_train, anomaly_cnt=len(value_train) * anomaly_ratio, ahead=0) feature_fluc = get_fluc(value_train) params = (mean, sigma, hw_mul, hw_sigma, len(value_train)) print("Combining features...\n") features1 = np.r_[ '1,2,0', #MA1, MA2, MA3, MA4, MA5, MA6, Dif1, Dif2, Dif3, Dif4, Dif5, Dif6, feature_3_sigma, feature_4_sigma, feature_5_sigma, feature_mean_diff, feature_holtwinter_diff, feature_holtwinter, hw_smoothdiff, hw_smooth, feature_fluc] features = feature_3_sigma[50:] #features = MA1[50:] for f in [ #MA2, MA3, MA4, MA5, MA6, Dif1, Dif2, Dif3, Dif4, Dif5, Dif6, feature_3_sigma, feature_4_sigma, feature_5_sigma, feature_mean_diff, feature_holtwinter_diff, feature_holtwinter, hw_smoothdiff, hw_smooth, feature_fluc ]: features = np.r_['1,2,0', features, f[50:]] print("Training...\n") clf = IsolationForest(contamination=anomaly_ratio, n_estimators=2000, bootstrap=False, max_samples='auto') #clf = LocalOutlierFactor(contamination = anomaly_ratio) clf.fit(features) y = clf.predict(features1) #y = clf.fit_predict(features1) outlierx, outliery = _getanomaly_graph(y, value_train, key=-1) print("Find Anomaly: ", len(outlierx)) legend = plt.legend(loc='upper left', shadow=True, fontsize='small') plt.plot(value_train, label='real_value') plt.scatter(outlierx, outliery, s=30, c='red', marker='x', label='detected_anomaly') plt.title('IForest_noMA') legend = plt.legend(loc='upper left', shadow=True, fontsize='small') plt.show() return y
def _Unsupervised_withlabel(value_train, anomaly_train=None, anomaly_ratio=0.004): N = len(value_train) Stdanomcnt = _InitialGraph(value_train, anomaly_train) mean, sigma = np.mean(value_train), np.std(value_train) #MA1, MA2, MA3, MA4, MA5, MA6, Dif1, Dif2, Dif3, Dif4, Dif5, Dif6 = moving_average(value_train) feature_3_sigma = naive_sigma(value_train, 3, mean, sigma) feature_4_sigma = naive_sigma(value_train, 4, mean, sigma) feature_5_sigma = naive_sigma(value_train, 5, mean, sigma) feature_mean_diff = sigma_diff(value_train, mean) feature_holtwinter_diff, feature_holtwinter, predicted_holtwinter, hw_smoothdiff, hw_smooth, hw_mul, hw_sigma, hw_params = holtWinters( value_train, anomaly_cnt=len(value_train) * anomaly_ratio, ahead=0) params = (mean, sigma, hw_mul, hw_sigma, len(value_train)) print("Combining features...\n") features1 = np.r_['1,2,0', feature_3_sigma, feature_4_sigma, feature_5_sigma, feature_mean_diff, feature_holtwinter_diff, feature_holtwinter, hw_smoothdiff, hw_smooth] features = feature_3_sigma[50:] for f in [ feature_4_sigma, feature_5_sigma, feature_mean_diff, feature_holtwinter_diff, feature_holtwinter, hw_smoothdiff, hw_smooth ]: features = np.r_['1,2,0', features, f[50:]] print("Training...\n") clf = IsolationForest(n_estimators=100, contamination=anomaly_ratio, bootstrap=True, max_samples='auto') clf.fit(features) y = clf.predict(features1) misscnt, misrcnt = 0, 0 for i in range(len(y)): if y[i] == -1: if anomaly_train[i] == 0: misrcnt += 1 else: if anomaly_train[i] == 1: misscnt += 1 outlierx, outliery = _getanomaly_graph(y, value_train, key=-1) print("All to find: ", Stdanomcnt) print("Find Anomaly: ", len(outlierx)) print("Missing: ", misscnt) print("Missreport: ", misrcnt) legend = plt.legend(loc='upper left', shadow=True, fontsize='small') plt.subplot(212) plt.plot(value_train, label='real_value') plt.scatter(outlierx, outliery, s=30, c='red', marker='x', label='detected_anomaly') legend = plt.legend(loc='upper left', shadow=True, fontsize='small') plt.show() return y
def _train(value_train, anomaly_train, SavedSamples): x, y = _getanomaly(anomaly_train) anomaly_cnt = len(y) mean, sigma = np.mean(value_train), np.std(value_train) MA1, MA2, MA3, MA4, MA5, MA6, Dif1, Dif2, Dif3, Dif4, Dif5, Dif6 = moving_average( value_train) feature_3_sigma = naive_sigma(value_train, 3, mean, sigma) feature_4_sigma = naive_sigma(value_train, 4, mean, sigma) feature_5_sigma = naive_sigma(value_train, 5, mean, sigma) feature_mean_diff = sigma_diff(value_train, mean) feature_holtwinter_diff, feature_holtwinter, predicted_holtwinter, hw_smoothdiff, hw_smooth, hw_mul, hw_sigma, hw_params = holtWinters( value_train, anomaly_cnt, ahead=SavedSamples) params = (mean, sigma, hw_mul, hw_sigma, len(value_train)) print("Combining features...\n") features1 = np.r_[ '1,2,0', #MA1, MA2, MA3, MA4, MA5, MA6, #Dif1, Dif2, Dif3, Dif4, Dif5, Dif6, feature_3_sigma, feature_4_sigma, feature_5_sigma, feature_mean_diff, feature_holtwinter_diff, feature_holtwinter, hw_smoothdiff, hw_smooth] features = feature_3_sigma[50:] for f in [ #MA2, MA3, MA4, MA5, MA6, #Dif2, Dif3, Dif4, Dif5, Dif6, feature_4_sigma, feature_5_sigma, feature_mean_diff, feature_holtwinter_diff, feature_holtwinter, hw_smoothdiff, hw_smooth ]: features = np.r_['1,2,0', features, f[50:]] ''' results = np.r_[value_train, predicted_holtwinter] plt.plot(results) anomaly_x, anomaly_y = _getanomaly(anomaly_train, upshift = 100) holtwinter_x, holtwinter_y = _getanomaly(feature_holtwinter, upshift = 120) plt.scatter(anomaly_x, anomaly_y, s = 10, c = 'red', marker = 'x') plt.scatter(holtwinter_x, holtwinter_y, s = 10, c = 'orange', marker = 'x') plt.show() ''' print("Training...\n") clf = RandomForestClassifier(n_estimators=100, max_depth=10) clf.fit(features, anomaly_train[50:]) print(clf.feature_importances_) plt.plot(np.concatenate((value_train, predicted_holtwinter)), label='prediction') legend = plt.legend(loc='upper left', shadow=True, fontsize='small') return clf, predicted_holtwinter, params, hw_params