Пример #1
0
def train_svm(params, suffix, train_X, train_Y, test_X, test_Y):
    C = params['C']
    kernel = params['kernel']
    model = SVC(gamma='scale', probability=True, C=C, kernel=kernel)
    print("Params C:", C, "kernel:", kernel)
    model.fit(train_X, train_Y)
    print("Train score", model.score(train_X, train_Y))
    test_score = model.score(test_X, test_Y)
    print("Test score", test_score)
    return test_score, None
Пример #2
0
def phoneAccelerometerISVM():
    print("Loading data...")
    data = pd.read_csv("./Train_Phone-Acc-nexus4_1-a.csv")
    print("Done!")

    # Parse data and make bike vs not-biking classification using an SVM.
    # Note: I'm assuming a window width of 500
    print("Finding time series windows indexes for each class kind...")
    previousClassLabel = str(data.get_value(data.index[0], 'gt'))
    pos = 0
    y = []
    X = []
    window = 500
    while pos < data.shape[0]:
        # Make y label.
        if str(data.iloc[pos]['gt']) == 'sit':
            y.append(1)
        else:
            y.append(-1)

        # Make X row.
        X.append(data.iloc[pos:pos + window]['y'])

        # Move to the next window
        pos += window
    print("Done!")

    # Build and fit the SVM.
    print("Training SVM on all data accelerometer data...")
    X = np.array(X)
    y = np.array(y)
    #clfs = LinearSVC()
    clfs = SVC()
    clfs.fit(X, y)
    print("Done!")

    # print("Predicting accelerometer classes on all data using SVM...")
    # ypred = predict(X, clfs.coef_.reshape(len(clfs.coef_.ravel()), 1))
    # print("Done!")
    # error = calculateTotalAbsoluteError(y, ypred) / y.shape[0]
    # print("Accelerometer training error (Means kind of nothing): %f"%error)

    # Cross validation
    print("Training SVM on accelerometer training only data...")
    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size = 0.1) #, random_state = 0
    clfs = SVC()
    clfs.fit(X_train, y_train)
    yhat = clfs.predict(X_test)
    print("Abs Error = %f"%( calculateTotalAbsoluteError(yhat, y_test)/len(yhat)))
    print("Test data mean accuracy SVM score: %f"%clfs.score(X_test, y_test))
    f1_c0 = f1_score(y_test, clfs.predict(X_test), pos_label=1, average='binary')
    #print("Test data f1 score for class -1: %f"%(f1_c0))
    print("Test data f1 score for class +1: %f" % (f1_c0))
    print("Done!")
Пример #3
0
            if Y_label != 'NULL' or random.random() > 0:
                if Y_label == event_name:
                    Y = 1
                else:
                    Y = 0

                if i == 0:
                    X_all = X
                    Y_all = Y
                    i = 1
                else:
                    X_all = np.vstack((X_all, X))
                    Y_all = np.append(Y_all, Y)
                    i += 1
        # print (i)
    # print (np.sum(X_all, axis = 1))
    # print(X_all, Y_all)

    clf = SVC(kernel=chi2_kernel)
    # clf = SVC()
    clf.fit(X_all, Y_all)

    print(clf.score(X_all, Y_all))
    print(clf.predict(X_all))

    fread.close()

    cPickle.dump(clf, open(output_file, "wb"))

    print 'SVM trained successfully for event %s!' % (event_name)
Пример #4
0
    trn_embedding = embed(X_trn, triplet_model)
    val_embedding = embed(X_val, triplet_model)

    # print (X_trn, X_val)
    # print (trn_embedding, val_embedding)
    # print (triplet_model.get_weights())

    clf = SVC(
        # class_weight='balanced',
        probability=True,
        # tol=1e-4,
    )

    clf.fit(trn_embedding, Y_trn)

    print(clf.score(val_embedding, Y_val))
    print(clf.predict_proba(val_embedding))

    print(roc_auc_score(Y_val, clf.predict(val_embedding)))
    print(classification_report(Y_val, clf.predict(val_embedding), digits=4))

    all_files = [x[:-8] for x in os.listdir(ALL_FILES)]
    X = [
        pickle.load(open(os.path.join(FEATURE_PATH, x + ".fkmeans"), "rb"),
                    encoding='latin1') for x in all_files
    ]
    # Y = [ranks[x.split()[0].strip()] for x in all_files]

    proba = clf.predict_proba(embed(np.array(X), triplet_model))

    # print(len(proba), len(proba[0]), proba[0])
Пример #5
0
test_labels = []

for email in emails:
    email_id = email.id
    prefix_train_pos = 'email_' + str(email_id)
    if email_id % 5 != 0:
        train_arrays.append(model.docvecs[prefix_train_pos])
        train_labels.append(int(email.label))
    else:
        test_arrays.append(model.docvecs[prefix_train_pos])
        test_labels.append(int(email.label))
        
classifier = SVC()
classifier.fit(numpy.array(train_arrays), numpy.array(train_labels))

print("Overall score is %f." % classifier.score(numpy.array(test_arrays), numpy.array(test_labels)))

corrects = []
wrongs = []
for email in emails:
    email_id = email.id
    prefix_train_pos = 'email_' + str(email_id)
    if email_id % 5 == 0:
        prediction = classifier.predict([model.docvecs[prefix_train_pos]])[0]
        actual = int(email.label)
        if prediction != actual:
            wrongs.append((email.id, prediction, actual))
        else:
#             print(max(classifier.predict_proba([model.docvecs[prefix_train_pos]])[0]), actual)
            corrects.append(email.id)