def firstProblem():
    result = []

    for m in range(100):
        for size in ssize:
            score = Score()
            cv = LeaveNOut()
            classifier = KNeighborsClassifier(n_neighbors=3, algorithm="kd_tree")

            X = np.random.normal(mu, sigma, size)
            X = np.expand_dims(X, axis=1)

            Y = np.zeros([X.shape[0]])
            Y = np.expand_dims(Y, axis=1)
            one_indices = random.sample([i for i in range(X.shape[0])], size / 2)
            Y[one_indices, 0] = 1

            #leave one out
            pred, true = cv.run(data=X, labels=Y, model=classifier, n_out=1)
            c = score.c_score(pred, true)
            auc_score = auc(pred, true, reorder=True)
            result.append([c, auc_score, size, m])

    data = pd.DataFrame(result)
    data.to_csv('result1.csv', header=False, index=False)
def right_feature_selection(X, Y):
    score = Score()
    cv = LeaveNOut()
    classifier = KNeighborsClassifier(n_neighbors=3, algorithm="kd_tree")

    pred, true = cv.run(data=X, labels=Y, model=classifier, n_out=1, embedded_feature_selection=True)
    c = score.c_score(pred, true)
    auc_score = auc(pred, true, reorder=True)
    return c, auc_score
def wrong_feature_selection(X, Y):
    score = Score()
    cv = LeaveNOut()
    classifier = KNeighborsClassifier(n_neighbors=3, algorithm="kd_tree")

    X, indices = cv.select(X, Y, select_count=10)
    pred, true = cv.run(data=X, labels=Y, model=classifier, n_out=1)
    c = score.c_score(pred, true)
    auc_score = auc(pred, true, reorder=True)
    return c, auc_score
from kNN import NearestNeighborsRegressor
from sklearn.neighbors import KNeighborsRegressor
from Score import *
import matplotlib.pyplot as plt
import seaborn as sns
from legacy_script import *
from LeaveNOut import *
from sklearn.svm import SVR
import pdb

input = pd.read_csv('./Soil_water_permeability_data/INPUT.csv',header=None)
output = pd.read_csv('./Soil_water_permeability_data/OUTPUT.csv', header=None)
coordinates = pd.read_csv('./Soil_water_permeability_data/COORDINATES.csv', header=None)

# Normalization
std_input = standardize_dataset(input)
plot_data = []

for n in range(0,201,10):
    model = NearestNeighborsRegressor(n_neighbors=5)
    runner = LeaveNOut(zone_radius=n)
    predict, true = runner.run(data=std_input, model=model, labels=output.as_matrix(), n_out=1, coordinates = coordinates.as_matrix())
    score = Score()
    plot_data.append([score.c_score(np.array(predict)[:, 0], np.array(true)[:, 0]), n, 'Concordance Index'])
    print "epoch %d " % n

line_plot(np.array(plot_data), title="Concordance index by different Dead zone radius - Leave 1 out CV",
              x_title="Dead zone radius", y_title="C-Index")


import matplotlib.pyplot as plt
import seaborn as sns
from legacy_script import *
from LeaveNOut import *
from sklearn.svm import SVR
import pdb

data = pd.read_csv('Water_data.csv')
# Split data into labels and features
train_labels, train_data = np.hsplit(data, [3])

# Normalization
train_data = standardize_dataset(train_data)
train_labels = train_labels.as_matrix()

#Try with different neighbors and different leave N out cross validation:
for n in [1, 3]:
    plot_data = []

    for i in range(1, 30, 1):
        model = NearestNeighborsRegressor(n_neighbors=i)
        runner = LeaveNOut()
        predict, true = runner.run(data=train_data, model=model, labels=train_labels, n_out=n)
        score = Score()
        plot_data.append([score.c_score(np.array(predict)[:, 0], np.array(true)[:, 0]), i, 'c_total'])
        plot_data.append([score.c_score(np.array(predict)[:, 1], np.array(true)[:, 1]), i, 'Cd'])
        plot_data.append([score.c_score(np.array(predict)[:, 2], np.array(true)[:, 2]), i, 'Pb'])

    line_plot(np.array(plot_data), title="C_index by different K Neighbors - Leave %s out CV" % n,
              x_title="K Neighbors", y_title="C-Index")