def main(tau, train_path, eval_path): """Problem 5(b): Locally weighted regression (LWR) Args: tau: Bandwidth parameter for LWR. train_path: Path to CSV file containing dataset for training. eval_path: Path to CSV file containing dataset for evaluation. """ ## ## # Load training set x_train_org, y_train,x_eval_org,y_eval, data_frame = util.load_dataset_new(train_path,eval_path) # Feature Scaling sc_X = StandardScaler() x_train= util.add_intercept(sc_X.fit_transform(x_train_org)) x_eval= util.add_intercept(sc_X.fit_transform(x_eval_org)) #all_zeros = np.where(~x_train.any(axis=0))[0] #print(all_zeros) print("Train shape:" + str(x_train.shape)) print("Eval shape:" + str(x_eval.shape)) # Fit a LWR model clf = LocallyWeightedLinearRegression(tau) clf.fit(x_train, y_train, 0.1) y_train_out_real = np.dot(x_train, clf.theta) #print(y_train_out) p_eval = clf.predict(x_eval) def give_error(y_out, y): cnt = 0 for i in range(len(y_out)): if (y_out[i] == y[i]): cnt +=1 return cnt/len(y_out) #print(give_error(p_eval,y_eval)) print(p_eval, y_eval)
# print("Predicted:" + str(y_out[i]) + ",actual:" + str(y[i])) # print("%success=" + str(class_probabilities[i][0]*100) + " %mission-failure=" + str(class_probabilities[i][1]*100) + " %flight-failure=" + str(class_probabilities[i][2]*100)) cntfalse += 1 # if (y_out[i] == 2): # #print("Flight " + str(int(x[i][flight_id_index])) + " might need maintaince, our algorithm predicted it would have mission failure!") # if (y_out[i] == 4): # #print("Flight " + str(int(x[i][flight_id_index])) + " definitely needs maintaince, our algorithm predicted it would have flight failure!") print("Predicted " + str(cnt) + "/" + str(len(y_out)) + " correctly.") print("Predicted " + str(cntfalse) + "/" + str(len(y_out)) + " incorrectly.") return cnt / len(y_out) train_path = "output/flights_pass_1_na_0.csv" eval_path = "testinput/flights_new_till_03dec.csv" X, Y, X_test, Y_test, dataset = util.load_dataset_new(train_path, eval_path) # Fitting the classifier into the Training set from sklearn.svm import SVC regression_model = LinearRegression() regression_model.fit(X, Y) # Y_pred_train = classifier.predict(X_Train) # print(give_error(Y_pred_train,Y_Train)) #w = classifier.coef_ #print('w = ',w) print("Score:") print(regression_model.score(X_test, Y_test)) y_predict = regression_model.predict(X_test) #print(y_predict)
from sklearn.svm import SVC import util import numpy as np import matplotlib.pyplot as plt from sklearn import svm, datasets from sklearn.decomposition import PCA iris = datasets.load_iris() from sklearn.preprocessing import StandardScaler train_path = "output/flights_pass_1_na_0.csv" eval_path = "testinput/all_test_with_failures_clean.csv" #X, Y, X_test, Y_test, dataset = util.load_dataset_new(train_path, eval_path) x_train_org, y, x_valid_org, y_eval, dataset = util.load_dataset_new(train_path, eval_path) sc_X = StandardScaler() X_Train = util.add_intercept(sc_X.fit_transform(x_train_org)) X_Test = util.add_intercept(sc_X.fit_transform(x_valid_org)) ##X = iris.data ##y = iris.target X = X_Train y = y pca = PCA(n_components=2) Xreduced = pca.fit_transform(X) Xtestreduced = pca.transform(X_Test) def give_error(y_out, y): cnt = 0 cntfour = 0
def main(file1): print("Running main") train_path = "output/flights_pass_1_na_0.csv" eval_path = "testinput/all_test_with_failures_clean.csv" #X, Y, X_test, Y_test, dataset = util.load_dataset_new(train_path, eval_path) x_train_org, y_train, x_valid_org, y_valid, dataset = util.load_dataset_new( train_path, eval_path) sc_X = StandardScaler() x_train = util.add_intercept(sc_X.fit_transform(x_train_org)) x_valid = util.add_intercept(sc_X.fit_transform(x_valid_org)) ###plot correlation matrix corr_after_dropping = dataset.corr() labels = corr_after_dropping.columns.values plt.matshow(corr_after_dropping) fig = plt.figure() ax = fig.add_subplot(111) cax = ax.matshow(corr_after_dropping, vmin=-1, vmax=1) fig.colorbar(cax) ticks = np.arange(0, len(corr_after_dropping.columns), 1) ax.set_xticks(ticks) ax.set_yticks(ticks) #ax.set_xticklabels(labels, size=1) ax.set_yticklabels(labels, size=5) plot_path = 'output/correlation_plot' plt.savefig(plot_path) ##Scatter #headers = list(dataset.columns.values) ##scatter = pd.DataFrame(dataset, columns=headers) #scatter = pd.DataFrame(dataset) #my_scatter = scatter_matrix(scatter) #plt.savefig("output/flightscatter") def give_error(y_out, y): cnt = 0 for i in range(len(y_out)): if (y_out[i] == y[i]): cnt += 1 return cnt / len(y_out) ##Normal Eq tau = 0.1 lwr = LinearReg_normal_eq_locally_weighted(tau) lwr.x_train = x_train lwr.y_train = y_train lwr.x_valid = x_valid theta_train = lwr.fit(x_train, y_train, 0.05) y_train_out = sigmoid(x_train, theta_train) y_valid_out_ne = sigmoid(x_valid, theta_train) y_train_out_1 = np.where(y_train_out > 0.65, 1, 0) y_valid_out_ne_1 = np.where(y_valid_out_ne > 0.65, 1, 0) print(give_error(y_valid_out_ne_1, y_valid)) print(give_error(y_train_out_1, y_train)) ##print(y_valid_out_ne_1) #print(y_valid_out_ne) ##print(y_valid) ##LWR ###tau_array = np.array([10]) ###r2_valid_lwr = 0 ###for i in range(0, len(tau_array)): ### lwr.tau = tau_array[i] ### y_valid_out_lwr = lwr.predict(x_valid) ### y_valid_out_lwr_1 = np.where(y_valid_out_lwr > 0.65, 1, 0) ### print(give_error(y_valid_out_lwr_1, y_valid)) ##Gradient descent linear_reg = LinearRegression_gradient_descent() linear_reg.x_train = x_train linear_reg.y_train = y_train l1_l2_factor = np.array([1, 2]) ##learning_rate = 4.85e-5 lambda_array = np.array([10, 0.5]) learning_rate = 1e-5 cost_limit = 1e-12 r2_train_gd = 0 r2_valid_gd = 0 for i in range(0, len(l1_l2_factor)): ##for i in range(0, 0): theta_train = linear_reg.fit(x_train, y_train, lambda_array[i], learning_rate, cost_limit, l1_l2_factor[i]) y_train_out = linear_reg.predict(x_train) y_valid_out = linear_reg.predict(x_valid) y_train_out_1 = np.where(y_train_out > 0.6, 1, 0) y_valid_out_1 = np.where(y_valid_out > 0.6, 1, 0) print(give_error(y_valid_out_1, y_valid)) print(give_error(y_train_out_1, y_train))
##names = ["RBF SVM"] ##classifiers = [ ## SVC(gamma='auto')] X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) rng = np.random.RandomState(2) X += 2 * rng.uniform(size=X.shape) train_path = 'output/flights_pass_1_na_0_pca.csv' eval_path = 'output/flights_pass_1_na_0.csv' x_1, y_1, x_eval, y_eval, data_frame = util.load_dataset_new( train_path, eval_path) # Feature Scaling from sklearn.preprocessing import StandardScaler sc_X = StandardScaler() X_Train_1 = sc_X.fit_transform(x_1) X_Train = X_Train_1[0:300, :] ##X = iris.data ##y = iris.target pca = PCA(n_components=2) X = pca.fit_transform(X_Train) ##y = y_1 y = y_1[0:300] linearly_separable = (X, y)
from sklearn import svm, datasets from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix # import some data to play with iris = datasets.load_iris() X = iris.data y = iris.target class_names = iris.target_names print(class_names) # Split the data into a training set and a test set train_path = "output/flights_pass_1_na_0.csv" #eval_path = "output/flights_pass_1_na_0.csv" eval_path = "testinput/all_test_with_failures_clean.csv" X_train, y_train, X_test, y_test, dataset = util.load_dataset_new( train_path, eval_path) from sklearn.preprocessing import StandardScaler sc_X = StandardScaler() X = sc_X.fit_transform(X) X_test_transformed = sc_X.fit_transform(X_test) # Run classifier, using a model that is too regularized (C too low) to see # the impact on the results classifier = svm.SVC(kernel='rbf', gamma='auto') y_pred = classifier.fit(X_train, y_train).predict(X_test) def plot_confusion_matrix(cm, classes, normalize=False,