Пример #1
0
def _Unsupervised_nolabel(value_train, anomaly_ratio=0.004):
    N = len(value_train)
    mean, sigma = np.mean(value_train), np.std(value_train)

    print("\nCalculating features...\n")

    MA1, MA2, MA3, MA4, MA5, MA6, Dif1, Dif2, Dif3, Dif4, Dif5, Dif6 = moving_average(
        value_train)
    feature_3_sigma = naive_sigma(value_train, 3, mean, sigma)
    feature_4_sigma = naive_sigma(value_train, 4, mean, sigma)
    feature_5_sigma = naive_sigma(value_train, 5, mean, sigma)
    feature_mean_diff = sigma_diff(value_train, mean)
    feature_holtwinter_diff, feature_holtwinter, predicted_holtwinter, hw_smoothdiff, hw_smooth, hw_mul, hw_sigma, hw_params = holtWinters(
        value_train, anomaly_cnt=len(value_train) * anomaly_ratio, ahead=0)
    feature_fluc = get_fluc(value_train)
    params = (mean, sigma, hw_mul, hw_sigma, len(value_train))

    print("Combining features...\n")

    features1 = np.r_[
        '1,2,0',
        #MA1, MA2, MA3, MA4, MA5, MA6, Dif1, Dif2, Dif3, Dif4, Dif5, Dif6,
        feature_3_sigma, feature_4_sigma, feature_5_sigma, feature_mean_diff,
        feature_holtwinter_diff, feature_holtwinter, hw_smoothdiff, hw_smooth,
        feature_fluc]

    features = feature_3_sigma[50:]
    #features = MA1[50:]
    for f in [
            #MA2, MA3, MA4, MA5, MA6, Dif1, Dif2, Dif3, Dif4, Dif5, Dif6, feature_3_sigma,
            feature_4_sigma,
            feature_5_sigma,
            feature_mean_diff,
            feature_holtwinter_diff,
            feature_holtwinter,
            hw_smoothdiff,
            hw_smooth,
            feature_fluc
    ]:
        features = np.r_['1,2,0', features, f[50:]]

    print("Training...\n")

    clf = IsolationForest(contamination=anomaly_ratio,
                          n_estimators=2000,
                          bootstrap=False,
                          max_samples='auto')
    #clf = LocalOutlierFactor(contamination = anomaly_ratio)
    clf.fit(features)
    y = clf.predict(features1)
    #y = clf.fit_predict(features1)

    outlierx, outliery = _getanomaly_graph(y, value_train, key=-1)
    print("Find Anomaly: ", len(outlierx))
    legend = plt.legend(loc='upper left', shadow=True, fontsize='small')
    plt.plot(value_train, label='real_value')
    plt.scatter(outlierx,
                outliery,
                s=30,
                c='red',
                marker='x',
                label='detected_anomaly')
    plt.title('IForest_noMA')
    legend = plt.legend(loc='upper left', shadow=True, fontsize='small')
    plt.show()

    return y
Пример #2
0
def _Unsupervised_withlabel(value_train,
                            anomaly_train=None,
                            anomaly_ratio=0.004):
    N = len(value_train)
    Stdanomcnt = _InitialGraph(value_train, anomaly_train)
    mean, sigma = np.mean(value_train), np.std(value_train)

    #MA1, MA2, MA3, MA4, MA5, MA6, Dif1, Dif2, Dif3, Dif4, Dif5, Dif6 = moving_average(value_train)
    feature_3_sigma = naive_sigma(value_train, 3, mean, sigma)
    feature_4_sigma = naive_sigma(value_train, 4, mean, sigma)
    feature_5_sigma = naive_sigma(value_train, 5, mean, sigma)
    feature_mean_diff = sigma_diff(value_train, mean)
    feature_holtwinter_diff, feature_holtwinter, predicted_holtwinter, hw_smoothdiff, hw_smooth, hw_mul, hw_sigma, hw_params = holtWinters(
        value_train, anomaly_cnt=len(value_train) * anomaly_ratio, ahead=0)
    params = (mean, sigma, hw_mul, hw_sigma, len(value_train))

    print("Combining features...\n")

    features1 = np.r_['1,2,0', feature_3_sigma, feature_4_sigma,
                      feature_5_sigma, feature_mean_diff,
                      feature_holtwinter_diff, feature_holtwinter,
                      hw_smoothdiff, hw_smooth]

    features = feature_3_sigma[50:]
    for f in [
            feature_4_sigma, feature_5_sigma, feature_mean_diff,
            feature_holtwinter_diff, feature_holtwinter, hw_smoothdiff,
            hw_smooth
    ]:
        features = np.r_['1,2,0', features, f[50:]]

    print("Training...\n")

    clf = IsolationForest(n_estimators=100,
                          contamination=anomaly_ratio,
                          bootstrap=True,
                          max_samples='auto')
    clf.fit(features)
    y = clf.predict(features1)
    misscnt, misrcnt = 0, 0
    for i in range(len(y)):
        if y[i] == -1:
            if anomaly_train[i] == 0:
                misrcnt += 1
        else:
            if anomaly_train[i] == 1:
                misscnt += 1

    outlierx, outliery = _getanomaly_graph(y, value_train, key=-1)
    print("All to find: ", Stdanomcnt)
    print("Find Anomaly: ", len(outlierx))
    print("Missing: ", misscnt)
    print("Missreport: ", misrcnt)
    legend = plt.legend(loc='upper left', shadow=True, fontsize='small')
    plt.subplot(212)
    plt.plot(value_train, label='real_value')
    plt.scatter(outlierx,
                outliery,
                s=30,
                c='red',
                marker='x',
                label='detected_anomaly')
    legend = plt.legend(loc='upper left', shadow=True, fontsize='small')
    plt.show()

    return y
Пример #3
0
def _train(value_train, anomaly_train, SavedSamples):
    x, y = _getanomaly(anomaly_train)
    anomaly_cnt = len(y)
    mean, sigma = np.mean(value_train), np.std(value_train)
    MA1, MA2, MA3, MA4, MA5, MA6, Dif1, Dif2, Dif3, Dif4, Dif5, Dif6 = moving_average(
        value_train)
    feature_3_sigma = naive_sigma(value_train, 3, mean, sigma)
    feature_4_sigma = naive_sigma(value_train, 4, mean, sigma)
    feature_5_sigma = naive_sigma(value_train, 5, mean, sigma)
    feature_mean_diff = sigma_diff(value_train, mean)
    feature_holtwinter_diff, feature_holtwinter, predicted_holtwinter, hw_smoothdiff, hw_smooth, hw_mul, hw_sigma, hw_params = holtWinters(
        value_train, anomaly_cnt, ahead=SavedSamples)
    params = (mean, sigma, hw_mul, hw_sigma, len(value_train))

    print("Combining features...\n")
    features1 = np.r_[
        '1,2,0',
        #MA1, MA2, MA3, MA4, MA5, MA6,
        #Dif1, Dif2, Dif3, Dif4, Dif5, Dif6,
        feature_3_sigma, feature_4_sigma, feature_5_sigma, feature_mean_diff,
        feature_holtwinter_diff, feature_holtwinter, hw_smoothdiff, hw_smooth]

    features = feature_3_sigma[50:]
    for f in [  #MA2, MA3, MA4, MA5, MA6, 
            #Dif2, Dif3, Dif4, Dif5, Dif6,
            feature_4_sigma,
            feature_5_sigma,
            feature_mean_diff,
            feature_holtwinter_diff,
            feature_holtwinter,
            hw_smoothdiff,
            hw_smooth
    ]:
        features = np.r_['1,2,0', features, f[50:]]
    '''
	results = np.r_[value_train, predicted_holtwinter]
	plt.plot(results)

	anomaly_x, anomaly_y = _getanomaly(anomaly_train, upshift = 100)
	holtwinter_x, holtwinter_y = _getanomaly(feature_holtwinter, upshift = 120)
	plt.scatter(anomaly_x, anomaly_y, s = 10, c = 'red', marker = 'x')
	plt.scatter(holtwinter_x, holtwinter_y, s = 10, c = 'orange', marker = 'x')
	plt.show()
	'''
    print("Training...\n")

    clf = RandomForestClassifier(n_estimators=100, max_depth=10)
    clf.fit(features, anomaly_train[50:])
    print(clf.feature_importances_)
    plt.plot(np.concatenate((value_train, predicted_holtwinter)),
             label='prediction')
    legend = plt.legend(loc='upper left', shadow=True, fontsize='small')

    return clf, predicted_holtwinter, params, hw_params