def predict(inputs): data = [ inputs['variable1'], inputs['variable2'], inputs['variable3'], inputs['variable4'], inputs['variable5'], inputs['variable6'], inputs['variable7'], inputs['variable8'], inputs['variable9'], inputs['variable10'], inputs['variable11'], inputs['variable12'], inputs['variable13'], inputs['variable14'], inputs['variable15'], inputs['variable17'], inputs['variable18'], inputs['variable19'] ] data = pd.DataFrame([data], columns=columns[:-1]) # preprocess data data = restructure(data) if(inputs['model'] == models[1]): data, _ = scale(data, scaler) data, _, _ = impute(data, imp_mean, imp_mode) data, _ = encode_categorical_variables(data, encoders) data = data.to_numpy(dtype=np.float32) # prediction if inputs['model'] == models[0]: prediction = np.round(neural_n.predict(data)).astype(np.int16) prediction = encoders['classLabel'].inverse_transform(prediction)[0] elif inputs['model'] == models[1]: prediction = knn.predict(data).astype(np.int16) prediction = encoders['classLabel'].inverse_transform(prediction)[0] return prediction
def train(): train = pd.read_csv('dataset/training.csv', sep=';') # preprocess data train = restructure(train) train, scaler = scale(train) train, imp_mean, imp_mode = impute(train) train, encoders = encode_categorical_variables(train) X_train = train.drop(['classLabel'], axis=1).to_numpy(dtype=np.float32) y_train = train['classLabel'].astype(np.float32) # build model model = KNeighborsClassifier() # train model model.fit(X_train, y_train) print('\033[1m' + 'Using training data' + '\033[0m') print("Accuracy: ", round(model.score(X_train, y_train), 3)) # validate model valid = pd.read_csv('dataset/validation.csv', sep=';') valid = restructure(valid) valid, _ = scale(valid, scaler) valid, _, _ = impute(valid, imp_mean, imp_mode) valid, _ = encode_categorical_variables(valid, encoders) X_test = valid.drop(['classLabel'], axis=1).to_numpy(dtype=np.float32) y_test = valid['classLabel'].astype(np.float32) # save model if not os.path.exists('./models'): os.mkdir('models') with open('models/knn_model.sav', 'wb') as f: pickle.dump(model, f) # performance evaluation print('\033[1m' + 'Using validation data' + '\033[0m') y_pred = model.predict(X_test) evaluate_performance(y_test, y_pred) return model
def prepare_models(): global neural_n, knn, columns, scaler, imp_mean, imp_mode, encoders # load_models if not os.path.exists('./models/neural_n_model.h5'): from neural_n_model import train neural_n = train(with_plots = False) else: from tensorflow.keras.models import load_model neural_n = load_model('./models/neural_n_model.h5') if not os.path.exists('./models/knn_model.sav'): from knn_model import train knn = train() else: with open('./models/knn_model.sav', 'rb') as f: knn = pickle.load(f) # get preprocessing models from training data train = pd.read_csv('dataset/training.csv', sep=';') columns = train.columns train = restructure(train) train, scaler = scale(train) train, imp_mean, imp_mode = impute(train) _, encoders = encode_categorical_variables(train)
print("fetching data...") mydata = data.fetch_dataset() print("dropping correlated features...") data.drop_correlated(mydata) print("removing outliers...") data.remove_outliers(mydata) print("encoding categorical features...") mydata = data.encode_features(mydata) print("spliting data into train/test sets...") train, test = data.train_split(mydata) print("up sampling...") train = data.upsample_minority(train) print("spliting predictor/target features...") X_train, y_train, X_test, y_test = data.target_split(train, test) print("scaling datasets...") X_train, X_test = data.scale(X_train, X_test) print("performing dimensionality reduction...") X_train, X_test = data.reduce_dimension(X_train, X_test) X_train = data.to_df(data=X_train) X_test = data.to_df(data=X_test) y_train = data.to_df(data=y_train) y_test = data.to_df(data=y_test) print("Modelling using logistic regression...") logistic_reg = model.train_logistic_classifier(X_train, y_train) print("Modelling using xgboost classifier...") xgb = model.train_xgboost(X_train, y_train) print("Modelling using multi-layer perceptrons...") perceptron = model.train_perceptrons(X_train, y_train) print("Logistic Regression Accuracy: ", model.get_accuracy(logistic_reg, X_test, y_test)) print("XGBoost Accuracy: ", model.get_accuracy(xgb, X_test, y_test))
train_data, validation_data, test_data = data.delete_character( train_data, validation_data, test_data) # 排除异常点 #train_data, train_label = data.remove(train_data, train_label) # 降维 train_data, validation_data, test_data = data.pca(train_data, validation_data, test_data) # 方差分析 #train_data, validation_data, test_data = data.anova(train_data, train_label, # validation_data, validation_label, test_data) # 标准化 train_data, validation_data, test_data = data.scale(train_data, validation_data, test_data) ''' mlp_model = model.sklearn_mlp() mlp_model.train(train_data, train_label, validation_data, validation_label) mlp_test_label = mlp_model.predict(test_data) utils.write_txt(mlp_test_label) predict = mlp_model.predict(validation_data) ''' ''' torch_mlp = model.torch_train(train_data, train_label, validation_data, validation_label) test_label = model.torch_predict(test_data, torch_mlp) predict_torch = model.torch_predict(validation_data, torch_mlp) for i in range(predict.shape[0]): print(str(predict[i]) + str(predict_torch[i]) + str(validation_label[i])) '''