示例#1
0
def Remove_Outliers_and_Split_Train_Test():
    """
    read the data, remove the outliers, and split it into train set and test set
    :return X_train, X_test, y_train_, y_test_: X_train, X_test, y_train_
    (after to categorical), y_test_ (after to categorical)
    """
    dat = read_data()
    data = Remove_Outliers_IF(dat)[1]
    X = data.iloc[:, 1:9]
    y = data.loc_site
    y_ = To_Categorical(y, 9)
    X_train, X_test, y_train_, y_test_ = train_test_split(X,
                                                          y_,
                                                          test_size=0.3,
                                                          random_state=1)
    return X_train, X_test, y_train_, y_test_
示例#2
0
        :param test_dat: the test data
        :return: the predicted mpg
        """
        X_te = np.ones(len(test_dat))
        for i in range(1, self.order + 1):
            for v in self.var:
                x_te = test_dat[v]
                X_te = np.c_[X_te, x_te**(i)]
        return (np.dot(X_te, self.theta))


if __name__ == "__main__":
    """
     train the model with modified linear regression solver and get the result of problem5.
    """
    data = read_data()
    var_list = set_var_list()
    train_data, test_data = split_data(data)

    lin_2 = linear_regression_2(train_data)

    df_2 = pd.DataFrame(np.zeros(6).reshape(3, 2))
    df_2.columns = ['train', 'test']
    df_2.index = ['0th', '1st', '2nd']

    for i in range(3):
        lin_2.train(var_list, i)
        y_pred = lin_2.predict(train_data)
        df_2.iloc[i, 0] = mse(train_data.mpg, y_pred)
        y_pred = lin_2.predict(test_data)
        df_2.iloc[i, 1] = mse(test_data.mpg, y_pred)
示例#3
0
import numpy as np
import pandas as pd
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
from basicfunction import read_data, To_Categorical, ANN
from problem1 import Remove_Outliers_IF


if __name__ == "__main__":
    dat = read_data()
    data = Remove_Outliers_IF(dat)[1]
    X = data.iloc[:, 1:9]
    y = data.loc_site
    y_ = To_Categorical(y, 9)
    X_new = np.array([0.52, 0.47,0.52,0.23,0.55,0.03,0.52,0.39]).reshape(1,-1)
    np.random.seed(2)
    model_new = ANN(output_dim=9, num_hidden_layer=2, num_nodes_hidden= 9 )
    model_new.fit(X, y_, batch_size=1, epochs=200, verbose=1)
    prob = model_new.predict(X_new)
    print("It belongs to {} class, with probability = {}.".format(y_.columns[np.argmax(prob)], prob[0][np.argmax(prob)]))
    df_prob = pd.DataFrame({'class':y_.columns, 'pred_prob':prob[0]})
    print("The probability of every class is {}.".format(df_prob))