def predict(inputs):
    data = [
        inputs['variable1'], inputs['variable2'], inputs['variable3'], inputs['variable4'],
        inputs['variable5'], inputs['variable6'], inputs['variable7'], inputs['variable8'],
        inputs['variable9'], inputs['variable10'], inputs['variable11'], inputs['variable12'],
        inputs['variable13'], inputs['variable14'], inputs['variable15'], inputs['variable17'],
        inputs['variable18'], inputs['variable19']
    ]
    data = pd.DataFrame([data], columns=columns[:-1])

    # preprocess data
    data = restructure(data)

    if(inputs['model'] == models[1]):
        data, _ = scale(data, scaler)
    data, _, _ = impute(data, imp_mean, imp_mode)
    data, _ = encode_categorical_variables(data, encoders)
    data = data.to_numpy(dtype=np.float32)

    # prediction
    if inputs['model'] == models[0]:
        prediction = np.round(neural_n.predict(data)).astype(np.int16)
        prediction = encoders['classLabel'].inverse_transform(prediction)[0]

    elif inputs['model'] == models[1]:
        prediction = knn.predict(data).astype(np.int16)
        prediction = encoders['classLabel'].inverse_transform(prediction)[0]
    
    return prediction
def train():
    train = pd.read_csv('dataset/training.csv', sep=';')

    # preprocess data
    train = restructure(train)
    train, scaler = scale(train)
    train, imp_mean, imp_mode = impute(train)
    train, encoders = encode_categorical_variables(train)
    X_train = train.drop(['classLabel'], axis=1).to_numpy(dtype=np.float32)
    y_train = train['classLabel'].astype(np.float32)

    # build model
    model = KNeighborsClassifier()

    # train model
    model.fit(X_train, y_train)
    print('\033[1m' + 'Using training data' + '\033[0m')
    print("Accuracy: ", round(model.score(X_train, y_train), 3))

    # validate model
    valid = pd.read_csv('dataset/validation.csv', sep=';')
    valid = restructure(valid)
    valid, _ = scale(valid, scaler)
    valid, _, _ = impute(valid, imp_mean, imp_mode)

    valid, _ = encode_categorical_variables(valid, encoders)
    X_test = valid.drop(['classLabel'], axis=1).to_numpy(dtype=np.float32)
    y_test = valid['classLabel'].astype(np.float32)

    # save model
    if not os.path.exists('./models'):
        os.mkdir('models')
    with open('models/knn_model.sav', 'wb') as f:
        pickle.dump(model, f)

    # performance evaluation
    print('\033[1m' + 'Using validation data' + '\033[0m')
    y_pred = model.predict(X_test)
    evaluate_performance(y_test, y_pred)

    return model
def prepare_models():
    global neural_n, knn, columns, scaler, imp_mean, imp_mode, encoders
    # load_models
    if not os.path.exists('./models/neural_n_model.h5'):
        from neural_n_model import train
        neural_n = train(with_plots = False)
    else:
        from tensorflow.keras.models import load_model
        neural_n = load_model('./models/neural_n_model.h5')

    if not os.path.exists('./models/knn_model.sav'):
        from knn_model import train
        knn = train()
    else:
        with open('./models/knn_model.sav', 'rb') as f:
            knn = pickle.load(f)

    # get preprocessing models from training data
    train = pd.read_csv('dataset/training.csv', sep=';')
    columns = train.columns
    train = restructure(train)
    train, scaler = scale(train)
    train, imp_mean, imp_mode = impute(train)
    _, encoders = encode_categorical_variables(train)
Exemplo n.º 4
0
print("fetching data...")
mydata = data.fetch_dataset()
print("dropping correlated features...")
data.drop_correlated(mydata)
print("removing outliers...")
data.remove_outliers(mydata)
print("encoding categorical features...")
mydata = data.encode_features(mydata)
print("spliting data into train/test sets...")
train, test = data.train_split(mydata)
print("up sampling...")
train = data.upsample_minority(train)
print("spliting predictor/target features...")
X_train, y_train, X_test, y_test = data.target_split(train, test)
print("scaling datasets...")
X_train, X_test = data.scale(X_train, X_test)
print("performing dimensionality reduction...")
X_train, X_test = data.reduce_dimension(X_train, X_test)
X_train = data.to_df(data=X_train)
X_test = data.to_df(data=X_test)
y_train = data.to_df(data=y_train)
y_test = data.to_df(data=y_test)
print("Modelling using logistic regression...")
logistic_reg = model.train_logistic_classifier(X_train, y_train)
print("Modelling using xgboost classifier...")
xgb = model.train_xgboost(X_train, y_train)
print("Modelling using multi-layer perceptrons...")
perceptron = model.train_perceptrons(X_train, y_train)
print("Logistic Regression Accuracy: ",
      model.get_accuracy(logistic_reg, X_test, y_test))
print("XGBoost Accuracy: ", model.get_accuracy(xgb, X_test, y_test))
Exemplo n.º 5
0
train_data, validation_data, test_data = data.delete_character(
    train_data, validation_data, test_data)

# 排除异常点
#train_data, train_label = data.remove(train_data, train_label)

# 降维
train_data, validation_data, test_data = data.pca(train_data, validation_data,
                                                  test_data)

# 方差分析
#train_data, validation_data, test_data = data.anova(train_data, train_label,
#	validation_data, validation_label, test_data)

# 标准化
train_data, validation_data, test_data = data.scale(train_data,
                                                    validation_data, test_data)
'''
mlp_model = model.sklearn_mlp()
mlp_model.train(train_data, train_label, validation_data, validation_label)
mlp_test_label = mlp_model.predict(test_data)
utils.write_txt(mlp_test_label)
predict = mlp_model.predict(validation_data)
'''
'''
torch_mlp = model.torch_train(train_data, train_label, validation_data, validation_label)
test_label = model.torch_predict(test_data, torch_mlp)
predict_torch = model.torch_predict(validation_data, torch_mlp)
for i in range(predict.shape[0]):
	print(str(predict[i]) + str(predict_torch[i]) + str(validation_label[i]))
'''