Exemplo n.º 1
0
ax2.hist(rec_errors_diff, 250, facecolor='green', alpha=0.4, histtype='stepfilled')
ax3.hist(rec_errors_diff, 250, facecolor='green', alpha=0.4, histtype='stepfilled', normed=True)
ax3.hist(rec_errors_signal, 250, facecolor='red', alpha=0.4, histtype='stepfilled', normed=True)

# Plotting - raw variables
figA, axsA = plt.subplots(6, 6)
nColumn = 0
for axA in axsA.ravel():
    axA.hist(X_train[:,nColumn], 250, facecolor='blue', alpha=0.4, histtype='stepfilled', normed=True)
    #axA.hist(X_signal[:,nColumn][rec_errors_signal > 0.1], 250, facecolor='red', alpha=0.4, histtype='stepfilled', normed=True)
    axA.hist(X_signal[:,nColumn], 250, facecolor='red', alpha=0.4, histtype='stepfilled', normed=True)
    nColumn = nColumn+1

# Plotting - performance curves
true_positive,false_positive,precisions,recalls,f1s = makeMetrics(200,rec_errors_signal,rec_errors_diff)
print "Area under ROC = ",areaUnderROC(true_positive,false_positive)
figB, axsB = plt.subplots(1,2)
axB1,axB2 = axsB.ravel()
# ROC
axB1.plot(false_positive, true_positive, label='ROC curve')
axB1.plot([0, 1], [0, 1], 'k--')
axB1.set_xlim([0.0, 1.0])
axB1.set_ylim([0.0, 1.05])
axB1.set_xlabel('False Anomaly Rate')
axB1.set_ylabel('True Anomaly Rate')
# Precision, recall
axB2.plot(recalls, precisions, label='Precision-recall curve')
axB2.plot([0, 1.0], [0.5, 0.5], 'k--')
axB2.set_xlim([0.0, 1.0])
axB2.set_ylim([0.0, 1.05])
axB2.set_xlabel('Recall')
    print "Processing ",fn
    splitname = fn.split("_")
    type = splitname[0]
    nUnits = splitname[1]
    learningRate = splitname[2]
    nCycles = (splitname[3]).split('.')[0]
    inputFile = 'trained/'+fn
    
    if (type=='ae'):
        nn = pickle.load(open(inputFile, 'rb'))
        reconstucted_background = nn.predict(X_test_background)
        reconstucted_signal = nn.predict(X_test_signal)
        errors_background = reconstructionError(X_test_background,reconstucted_background)
        errors_signal = reconstructionError(X_test_signal,reconstucted_signal)
        true_positive,false_positive,precisions,recalls,f1s = makeMetrics(500,errors_signal,errors_background)
        tmpAUC = areaUnderROC(true_positive,false_positive)
        outputTextReport.write(type+' '+nUnits+' '+learningRate+' '+nCycles+' '+str(tmpAUC)+'\n')
        if tmpAUC > bestAEScore:
            bestAEScore = tmpAUC
            bestAE = [tmpAUC,type,nUnits,learningRate,nCycles]

    if (type=='cl'):
        nn = pickle.load(open(inputFile, 'rb'))
        predicted = nn.predict(X_test)
        probabilities = nn.predict_proba(X_test)
        fpr, tpr, thresholds = roc_curve(Y, probabilities[:,1], pos_label=1)
        tmpAUC = roc_auc_score(Y, probabilities[:,1])
        outputTextReport.write(type+' '+nUnits+' '+learningRate+' '+nCycles+' '+str(tmpAUC)+'\n')
        if tmpAUC > bestCLScore:
            bestCLScore = tmpAUC
            bestCL = [tmpAUC,type,nUnits,learningRate,nCycles]
    # Testing
    predicted_diff = nn.predict(X_test_bg)
    predicted_signal = nn.predict(X_test_sig)

    # Reconstruction error
    rec_errors_diff = reconstructionError(X_test_bg,predicted_diff)
    rec_errors_sig = reconstructionError(X_test_sig,predicted_signal)

    # Reconstruction errors by variable
    rec_errors_varwise_diff = reconstructionErrorByFeature(X_test_bg,predicted_diff)
    rec_errors_varwise_sig = reconstructionErrorByFeature(X_test_sig,predicted_signal)

    ## Plotting - performance curves
    ## ROC
    true_positive,false_positive,precisions,recalls,f1s = makeMetrics(2000,rec_errors_sig,rec_errors_diff)
    auc = areaUnderROC(true_positive,false_positive)
    print "Area under ROC = ",auc
    print ""
    print ""
    print ""
    axesROC[axesCounter].plot(false_positive, true_positive, label='ROC curve')
    axesROC[axesCounter].plot([0, 1], [0, 1], 'k--')
    axesROC[axesCounter].set_xlim([0.0, 1.0])
    axesROC[axesCounter].set_ylim([0.0, 1.05])
    axesROC[axesCounter].set_xlabel('False Anomaly Rate')
    axesROC[axesCounter].set_ylabel('True Anomaly Rate')
    axesROC[axesCounter].text(0.4,0.2,"AUC = %.4f" % auc,fontsize=15)


    # NN probabilities
    bins = np.linspace(-3.0, 3.0, 250)
    type = splitname[0]
    nUnits = splitname[1]
    learningRate = splitname[2]
    nCycles = (splitname[3]).split('.')[0]
    inputFile = 'trained/' + fn

    if (type == 'ae'):
        nn = pickle.load(open(inputFile, 'rb'))
        reconstucted_background = nn.predict(X_test_background)
        reconstucted_signal = nn.predict(X_test_signal)
        errors_background = reconstructionError(X_test_background,
                                                reconstucted_background)
        errors_signal = reconstructionError(X_test_signal, reconstucted_signal)
        true_positive, false_positive, precisions, recalls, f1s = makeMetrics(
            500, errors_signal, errors_background)
        tmpAUC = areaUnderROC(true_positive, false_positive)
        outputTextReport.write(type + ' ' + nUnits + ' ' + learningRate + ' ' +
                               nCycles + ' ' + str(tmpAUC) + '\n')
        if tmpAUC > bestAEScore:
            bestAEScore = tmpAUC
            bestAE = [tmpAUC, type, nUnits, learningRate, nCycles]

    if (type == 'cl'):
        nn = pickle.load(open(inputFile, 'rb'))
        predicted = nn.predict(X_test)
        probabilities = nn.predict_proba(X_test)
        fpr, tpr, thresholds = roc_curve(Y, probabilities[:, 1], pos_label=1)
        tmpAUC = roc_auc_score(Y, probabilities[:, 1])
        outputTextReport.write(type + ' ' + nUnits + ' ' + learningRate + ' ' +
                               nCycles + ' ' + str(tmpAUC) + '\n')
        if tmpAUC > bestCLScore: