def Remove_Outliers_and_Split_Train_Test(): """ read the data, remove the outliers, and split it into train set and test set :return X_train, X_test, y_train_, y_test_: X_train, X_test, y_train_ (after to categorical), y_test_ (after to categorical) """ dat = read_data() data = Remove_Outliers_IF(dat)[1] X = data.iloc[:, 1:9] y = data.loc_site y_ = To_Categorical(y, 9) X_train, X_test, y_train_, y_test_ = train_test_split(X, y_, test_size=0.3, random_state=1) return X_train, X_test, y_train_, y_test_
:param test_dat: the test data :return: the predicted mpg """ X_te = np.ones(len(test_dat)) for i in range(1, self.order + 1): for v in self.var: x_te = test_dat[v] X_te = np.c_[X_te, x_te**(i)] return (np.dot(X_te, self.theta)) if __name__ == "__main__": """ train the model with modified linear regression solver and get the result of problem5. """ data = read_data() var_list = set_var_list() train_data, test_data = split_data(data) lin_2 = linear_regression_2(train_data) df_2 = pd.DataFrame(np.zeros(6).reshape(3, 2)) df_2.columns = ['train', 'test'] df_2.index = ['0th', '1st', '2nd'] for i in range(3): lin_2.train(var_list, i) y_pred = lin_2.predict(train_data) df_2.iloc[i, 0] = mse(train_data.mpg, y_pred) y_pred = lin_2.predict(test_data) df_2.iloc[i, 1] = mse(test_data.mpg, y_pred)
import numpy as np import pandas as pd import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' from basicfunction import read_data, To_Categorical, ANN from problem1 import Remove_Outliers_IF if __name__ == "__main__": dat = read_data() data = Remove_Outliers_IF(dat)[1] X = data.iloc[:, 1:9] y = data.loc_site y_ = To_Categorical(y, 9) X_new = np.array([0.52, 0.47,0.52,0.23,0.55,0.03,0.52,0.39]).reshape(1,-1) np.random.seed(2) model_new = ANN(output_dim=9, num_hidden_layer=2, num_nodes_hidden= 9 ) model_new.fit(X, y_, batch_size=1, epochs=200, verbose=1) prob = model_new.predict(X_new) print("It belongs to {} class, with probability = {}.".format(y_.columns[np.argmax(prob)], prob[0][np.argmax(prob)])) df_prob = pd.DataFrame({'class':y_.columns, 'pred_prob':prob[0]}) print("The probability of every class is {}.".format(df_prob))