def run(input_dir, num_samples_per_site, onevone = True):
    input_dir = sys.argv[1]
    num_samples_per_site = int(sys.argv[2])

    target_sites = get_target_sites(input_dir)

    labels = dict(map(lambda (i, s): (s, i), enumerate(target_sites)))
    
    X, Y = load_feature_vectors(input_dir, num_samples_per_site, labels)
    #X, Y, testX, testY = select_test_set(X, Y, (num_samples_per_site / 2) * len(target_sites))


    #X = [[100.0, 100.0], [101.0, 101.0], [200.0, 200.0], [201.0, 201.0], [300.0, 301.0], [300.0, 301.0]]
    #Y = [0.0, 0.0, 1.0, 1.0, 2.0, 2.0]

    iters = select_cross_validation_subsets(X, Y, 6)

    #crange = map(lambda c: c/10.0, range(1, 11))
    #crange = map(lambda c: c/1000.0, range(1, 11))
    #crange.extend(map(lambda c: c/100.0, range(2, 11)))
    #crange = [0.00001, 0.01, 1.0, 100.0, 10000.0]
    crange = [1.0, 100.0]
    for c in crange:
        print "C=%f"%c
        print "Linear kernel:"
        for d in iters:
            trainX, testX = scale(d["train"][0], d["test"][0])
            classify(trainX, d["train"][1], testX, d["test"][1], onevone, c)
        print "RBF kernel:"
        for d in iters:
            trainX, testX = scale(d["train"][0], d["test"][0])
            classify(trainX, d["train"][1], testX, d["test"][1], onevone, c, 'rbf')
Пример #2
0
def anomaly_detection(labels, Xnew, Y, testXnew, testY):

    print "Anomaly detection"

    for test_class in range(len(labels)):

        anomX = []
        for (i, x) in enumerate(Xnew):
            if Y[i] == test_class:
                anomX.append(x)

        minD = []
        # Translate so all coordinates are positive
        for d in range(len(anomX[0])):
            minD.append(min(map(lambda x: x[d], anomX)))

        anomX = translate(minD, anomX)
        anomX = scale(anomX)
        rho, alphas = AnomDet_fit(anomX, 0.1, rbf)

        test = testXnew
        test = translate(minD, test)
        test = scale(test)
        num_correct = [0, 0]
        predictions = [0, 0]
        for (i, x) in enumerate(test):
            classify = AnomDet_classify(x, alphas, rho, anomX, rbf)
            #print "Label: %d, classification: %d"%(testY[i], classify)
            if testY[i] == test_class:
                predictions[0] = predictions[0] + 1
                if classify == 0.0:
                    num_correct[0] = num_correct[0] + 1
            else:
                predictions[1] = predictions[1] + 1
                if classify != 0.0:
                    num_correct[1] = num_correct[1] + 1

        print "Test class %d. Normal correct: %d/%d, anomaly correct: %d/%d"%(
            test_class, num_correct[0], predictions[0], num_correct[1],
            predictions[1])
Пример #3
0
def multiclass_svm(X, Y, testX, testY, labels, c=1.0):
    X, testX = scale(X, testX)

    try:
        shutil.rmtree("tmp_x")
    except:
        print "No tmp directory to delete"
    os.mkdir("tmp_x")
    out = open("tmp_x/x.dat", mode="w+")
    for x in X:
        out.write(json.dumps(x.tolist()))
        out.write("\n")
    out.close()
    
    thetas, bs, slacks = SVM_fit("tmp_x/x.dat", Y, len(labels), len(X[0]), c)
    num_correct = 0
    for (i, x) in enumerate(testX):
        result = SVM_classify(x, thetas, bs)
        if result == testY[i]:
            num_correct = num_correct + 1
    print "Num correct: %d/%d"%(num_correct, len(testY))
Пример #4
0
def multiclass_svm(Xnew, testXnew, Y, testY, labels):
    print "Classifying with a multiclass SVM"

    Xnew, testXnew = scale(Xnew, testXnew)

    try:
        shutil.rmtree("tmp_x")
    except:
        print "No tmp directory to delete"
    os.mkdir("tmp_x")
    out = open("tmp_x/x.dat", mode="w+")
    for x in Xnew:
        out.write(json.dumps(x.tolist()))
        out.write("\n")
    out.close()
    out = open("tmp_x/xtest.dat", mode="w+")
    for x in testXnew:
        out.write(json.dumps(x.tolist()))
        out.write("\n")
    out.close()

    d = len(Xnew[0])
    del Xnew
    del testXnew

    thetas, bs, slacks = SVM_fit("tmp_x/x.dat", Y, len(labels), d, 0.05)

    num_correct = 0
    tests = open("tmp_x/xtest.dat")
    i = 0
    for l in tests:
        x = json.loads(l)
        if (SVM_classify(x, thetas, bs) == testY[i]):
            num_correct = num_correct + 1
        i = i + 1

    print "Num correct: %d/%d"%(num_correct, len(testY))