예제 #1
0
def main(tau, train_path, eval_path):
    """Problem 5(b): Locally weighted regression (LWR)

    Args:
        tau: Bandwidth parameter for LWR.
        train_path: Path to CSV file containing dataset for training.
        eval_path: Path to CSV file containing dataset for evaluation.
    """
    ##
    ##
    # Load training set
    x_train_org, y_train,x_eval_org,y_eval, data_frame = util.load_dataset_new(train_path,eval_path)

    # Feature Scaling
    sc_X = StandardScaler()
    x_train= util.add_intercept(sc_X.fit_transform(x_train_org))
    x_eval= util.add_intercept(sc_X.fit_transform(x_eval_org))
    #all_zeros = np.where(~x_train.any(axis=0))[0]
    #print(all_zeros)


    print("Train shape:" + str(x_train.shape))
    print("Eval shape:" + str(x_eval.shape))
    # Fit a LWR model
    clf = LocallyWeightedLinearRegression(tau)
    clf.fit(x_train, y_train, 0.1)
    y_train_out_real = np.dot(x_train, clf.theta)

    #print(y_train_out)
    p_eval = clf.predict(x_eval)

    def give_error(y_out, y):
        cnt = 0
        for i in range(len(y_out)):
            if (y_out[i] == y[i]):
                cnt +=1
        return cnt/len(y_out)
    #print(give_error(p_eval,y_eval))
    print(p_eval, y_eval)
예제 #2
0
            # print("Predicted:" + str(y_out[i]) + ",actual:" + str(y[i]))
            # print("%success=" + str(class_probabilities[i][0]*100) + " %mission-failure=" + str(class_probabilities[i][1]*100) + " %flight-failure=" + str(class_probabilities[i][2]*100))
            cntfalse += 1
            # if (y_out[i] == 2):
            #     #print("Flight " + str(int(x[i][flight_id_index])) + " might need maintaince, our algorithm predicted it would have mission failure!")
            # if (y_out[i] == 4):
            #     #print("Flight " + str(int(x[i][flight_id_index])) + " definitely needs maintaince, our algorithm predicted it would have flight failure!")
    print("Predicted " + str(cnt) + "/" + str(len(y_out)) + " correctly.")
    print("Predicted " + str(cntfalse) + "/" + str(len(y_out)) +
          " incorrectly.")
    return cnt / len(y_out)


train_path = "output/flights_pass_1_na_0.csv"
eval_path = "testinput/flights_new_till_03dec.csv"
X, Y, X_test, Y_test, dataset = util.load_dataset_new(train_path, eval_path)

# Fitting the classifier into the Training set
from sklearn.svm import SVC
regression_model = LinearRegression()
regression_model.fit(X, Y)

# Y_pred_train = classifier.predict(X_Train)
# print(give_error(Y_pred_train,Y_Train))
#w = classifier.coef_
#print('w = ',w)
print("Score:")
print(regression_model.score(X_test, Y_test))

y_predict = regression_model.predict(X_test)
#print(y_predict)
예제 #3
0
from sklearn.svm import SVC
import util
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from sklearn.decomposition import PCA

iris = datasets.load_iris()

from sklearn.preprocessing import StandardScaler

train_path = "output/flights_pass_1_na_0.csv"
eval_path = "testinput/all_test_with_failures_clean.csv"
#X, Y, X_test, Y_test, dataset = util.load_dataset_new(train_path, eval_path)
x_train_org, y, x_valid_org, y_eval, dataset = util.load_dataset_new(train_path, eval_path)
sc_X = StandardScaler()
X_Train = util.add_intercept(sc_X.fit_transform(x_train_org))
X_Test = util.add_intercept(sc_X.fit_transform(x_valid_org))

##X = iris.data
##y = iris.target
X = X_Train
y = y

pca = PCA(n_components=2)
Xreduced = pca.fit_transform(X)
Xtestreduced = pca.transform(X_Test)

def give_error(y_out, y):
    cnt = 0
    cntfour = 0
예제 #4
0
def main(file1):
    print("Running main")
    train_path = "output/flights_pass_1_na_0.csv"
    eval_path = "testinput/all_test_with_failures_clean.csv"
    #X, Y, X_test, Y_test, dataset = util.load_dataset_new(train_path, eval_path)
    x_train_org, y_train, x_valid_org, y_valid, dataset = util.load_dataset_new(
        train_path, eval_path)

    sc_X = StandardScaler()
    x_train = util.add_intercept(sc_X.fit_transform(x_train_org))
    x_valid = util.add_intercept(sc_X.fit_transform(x_valid_org))

    ###plot correlation matrix
    corr_after_dropping = dataset.corr()
    labels = corr_after_dropping.columns.values
    plt.matshow(corr_after_dropping)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    cax = ax.matshow(corr_after_dropping, vmin=-1, vmax=1)
    fig.colorbar(cax)
    ticks = np.arange(0, len(corr_after_dropping.columns), 1)
    ax.set_xticks(ticks)
    ax.set_yticks(ticks)
    #ax.set_xticklabels(labels, size=1)
    ax.set_yticklabels(labels, size=5)
    plot_path = 'output/correlation_plot'
    plt.savefig(plot_path)

    ##Scatter
    #headers = list(dataset.columns.values)
    ##scatter = pd.DataFrame(dataset, columns=headers)
    #scatter = pd.DataFrame(dataset)
    #my_scatter = scatter_matrix(scatter)
    #plt.savefig("output/flightscatter")

    def give_error(y_out, y):
        cnt = 0
        for i in range(len(y_out)):
            if (y_out[i] == y[i]):
                cnt += 1
        return cnt / len(y_out)

    ##Normal Eq
    tau = 0.1
    lwr = LinearReg_normal_eq_locally_weighted(tau)
    lwr.x_train = x_train
    lwr.y_train = y_train
    lwr.x_valid = x_valid
    theta_train = lwr.fit(x_train, y_train, 0.05)

    y_train_out = sigmoid(x_train, theta_train)
    y_valid_out_ne = sigmoid(x_valid, theta_train)

    y_train_out_1 = np.where(y_train_out > 0.65, 1, 0)
    y_valid_out_ne_1 = np.where(y_valid_out_ne > 0.65, 1, 0)

    print(give_error(y_valid_out_ne_1, y_valid))
    print(give_error(y_train_out_1, y_train))
    ##print(y_valid_out_ne_1)
    #print(y_valid_out_ne)
    ##print(y_valid)
    ##LWR
    ###tau_array = np.array([10])
    ###r2_valid_lwr = 0
    ###for i in range(0, len(tau_array)):
    ###    lwr.tau = tau_array[i]
    ###    y_valid_out_lwr = lwr.predict(x_valid)
    ###    y_valid_out_lwr_1 = np.where(y_valid_out_lwr > 0.65, 1, 0)
    ###    print(give_error(y_valid_out_lwr_1, y_valid))

    ##Gradient descent
    linear_reg = LinearRegression_gradient_descent()
    linear_reg.x_train = x_train
    linear_reg.y_train = y_train
    l1_l2_factor = np.array([1, 2])
    ##learning_rate = 4.85e-5
    lambda_array = np.array([10, 0.5])
    learning_rate = 1e-5
    cost_limit = 1e-12
    r2_train_gd = 0
    r2_valid_gd = 0
    for i in range(0, len(l1_l2_factor)):
        ##for i in range(0, 0):
        theta_train = linear_reg.fit(x_train, y_train, lambda_array[i],
                                     learning_rate, cost_limit,
                                     l1_l2_factor[i])

        y_train_out = linear_reg.predict(x_train)
        y_valid_out = linear_reg.predict(x_valid)

        y_train_out_1 = np.where(y_train_out > 0.6, 1, 0)
        y_valid_out_1 = np.where(y_valid_out > 0.6, 1, 0)

        print(give_error(y_valid_out_1, y_valid))
        print(give_error(y_train_out_1, y_train))
##names = ["RBF SVM"]
##classifiers = [
##    SVC(gamma='auto')]

X, y = make_classification(n_features=2,
                           n_redundant=0,
                           n_informative=2,
                           random_state=1,
                           n_clusters_per_class=1)

rng = np.random.RandomState(2)
X += 2 * rng.uniform(size=X.shape)
train_path = 'output/flights_pass_1_na_0_pca.csv'
eval_path = 'output/flights_pass_1_na_0.csv'
x_1, y_1, x_eval, y_eval, data_frame = util.load_dataset_new(
    train_path, eval_path)
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_Train_1 = sc_X.fit_transform(x_1)
X_Train = X_Train_1[0:300, :]

##X = iris.data
##y = iris.target
pca = PCA(n_components=2)

X = pca.fit_transform(X_Train)
##y = y_1
y = y_1[0:300]

linearly_separable = (X, y)
예제 #6
0
from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

# import some data to play with
iris = datasets.load_iris()
X = iris.data
y = iris.target
class_names = iris.target_names
print(class_names)

# Split the data into a training set and a test set
train_path = "output/flights_pass_1_na_0.csv"
#eval_path = "output/flights_pass_1_na_0.csv"
eval_path = "testinput/all_test_with_failures_clean.csv"
X_train, y_train, X_test, y_test, dataset = util.load_dataset_new(
    train_path, eval_path)
from sklearn.preprocessing import StandardScaler

sc_X = StandardScaler()
X = sc_X.fit_transform(X)
X_test_transformed = sc_X.fit_transform(X_test)

# Run classifier, using a model that is too regularized (C too low) to see
# the impact on the results
classifier = svm.SVC(kernel='rbf', gamma='auto')
y_pred = classifier.fit(X_train, y_train).predict(X_test)


def plot_confusion_matrix(cm,
                          classes,
                          normalize=False,