def testGaussian2():
    all_X, all_y = testData()
    xX = np.concatenate([all_X, all_y.reshape((len(all_y), 1))], axis=1)
    xX = [list(x.ravel()) for x in xX]
    np.random.shuffle(xX)
    y = np.array(xX).T[2]
    X = np.array(xX).T[:2].T
    y_test, X_test = all_y[-10:], all_X[-10:]
    params = {'bandwidth': np.logspace(-1, 1, 20)}
    grid = GridSearchCV(KernelDensity(), params)
    grid.fit(all_X)
    bw = grid.best_estimator_.bandwidth
    print 'Bandwidth {}'.format(bw)


    myclassifier = hw7u.MyKNN(metric='gaussian', density=True, bandwidth=bw)
    print 'start MyKNN'
    myclassifier.fit(X, y)
    #print 'start scikit'
    #knnsci = skclassifier.fit(X, y)
    print 'start my pred'
    y_pred = myclassifier.predict(X_test)
    #print 'start sk pred'
    #y_sci = knnsci.score(X_test)
    #print 'SciKit Accuracy: {}  My Accuracy: {}'.format(accuracy_score(fix_y(y_test), fix_y(y_sci)), accuracy_score(fix_y(y_test), fix_y(y_pred)))
    print 'My Accuracy: {}'.format(accuracy_score(hw7.fix_y(y_test), hw7.fix_y(y_pred)))
def tests_radius():
    i = 0
    j = 0
    k = 10
    X, y = testData()
    #print X
    X = np.concatenate([X, y.reshape((len(y), 1))], axis=1)
    X = [list(x.ravel()) for x in X]
    radius = [3, 5, 7]
    radius = [1e-1,1e-2,1e-3]  # for radius
    metric = ['minkowski', 'cosine', 'gaussian', 'poly2']
    ma = speedy.Kernel(ktype=metric[j]).compute
    #ma = hw7u.Kernel(ktype=metric[j]).compute
    print 'spam radius is {}'.format(radius[i])
    clf = hw7u.MyKNN(radius=radius[i], metric=metric[j], outlier_label=-1)
    skclf = RadiusNeighborsClassifier(radius=radius[i], algorithm='brute', metric="euclidean", p=2, outlier_label=.5)
    all_folds = hw3u.partition_folds(X, k)
    kf_train, kf_test = dl.get_train_and_test(all_folds, 0)
    y, X = hw4u.split_truth_from_data(kf_train)
    y_test, X_test = hw4u.split_truth_from_data(kf_test)
    print 'start scikit'
    knnsci = hw7u.KNN(classifier=skclf)
    print 'start MyKNN'
    knn = hw7u.KNN(classifier=clf)
    print 'start sk pred'
    y_sci = knnsci.predict(X_test, X, y)
    print 'start my pred'
    y_pred = knn.predict(X_test, X, y)
    print y_pred
    print 'SciKit Accuracy: {}  My Accuracy: {}'.format(accuracy_score(hw7.fix_y(y_test), hw7.fix_y(y_sci)), accuracy_score(hw7.fix_y(y_test), hw7.fix_y(y_pred)))
def tests_density():
    i = 0
    j = 2
    k = 10
    X, y = testData()
    print X
    X = np.concatenate([X, y.reshape((len(y), 1))], axis=1)
    X = [list(x.ravel()) for x in X]
    radius = [3, 5, 7]
    metric = ['minkowski', 'cosine', 'gaussian', 'poly2']
    ma = hw7u.Kernel(ktype=metric[j]).compute
    params = {'bandwidth': np.logspace(-1, 1, 20)}
    grid = GridSearchCV(KernelDensity(), params)
    grid.fit(X)
    clf = hw7u.MyKNN(metric=metric[j], density=True)

    bw = grid.best_estimator_.bandwidth
    print("best bandwidth: {0}".format(bw))

    # use the best estimator to compute the kernel density estimate
    kde = grid.best_estimator_
    skclf = KernelDensity(bandwidth=bw, kernel='gaussian')
    skclf.fit(X[:-10], y[:-10])
    print skclf.score_samples(X[-10:])
    return
    all_folds = hw3u.partition_folds(X, k)
    kf_train, kf_test = dl.get_train_and_test(all_folds, 0)
    y, X = hw4u.split_truth_from_data(kf_train)
    y_test, X_test = hw4u.split_truth_from_data(kf_test)
    print 'start scikit'
    knnsci = hw7u.KNN(classifier=skclf)
    print 'start MyKNN'
    knn = hw7u.KNN(classifier=clf)
    print 'start sk pred'
    y_sci = knnsci.predict(X_test, X, y)
    print 'start my pred'
    y_pred = knn.predict(X_test, X, y)
    print y_pred
    print 'SciKit Accuracy: {}  My Accuracy: {}'.format(accuracy_score(hw7.fix_y(y_test), hw7.fix_y(y_sci)), accuracy_score(hw7.fix_y(y_test), hw7.fix_y(y_pred)))