min_value = df[feature_name].min() result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value) return result for file in data_files: file_name = "data/" + file + training_suffix print("Working on: " + file_name + " - started at " + get_date_time()) data = pd.read_csv(file_name) data[date_column] = data[date_column].apply(transform_date) data = data[(data[date_column] > 0)] target = data[target_column] data.drop(target_column, inplace=True, axis=1) data.drop(id_column, inplace=True, axis=1) data.drop(name_column, inplace=True, axis=1) normalized_data = normalize(data) ada = md.fit_ada_boost(normalized_data, target, True) print(ada) test_data = pd.read_csv("data/" + file + test_suffix) test_data.drop(name_column, inplace=True, axis=1) results = pd.DataFrame() results[id_column] = test_data[id_column] test_data.drop(id_column, inplace=True, axis=1) test_data[date_column] = test_data[date_column].apply(transform_date) normalized_test_data = normalize(test_data) results[target_column] = ada.predict(normalized_test_data) results.to_csv(file + "results") print("end time: " + get_date_time())
import modelling as md import pandas as pd import feature_engineering as fe import os data = pd.read_csv("data/DJI_1d_10y_signal.csv") data = data.drop(["Date"], axis=1) data = data.ffill(axis=0) print(data) xTrain, xTest, yTrain, yTest = fe.ordered_train_test_split(data, "Signal") results = pd.DataFrame() results["true_y"] = yTest ada = md.fit_ada_boost(xTrain, yTrain, True) results["prediction"] = ada.predict(xTest) print("Adaboost Classifier") print(results["prediction"]) print(md.get_results(results["true_y"], fe.generate_y(results, "prediction"))) svm = md.fit_SVM(xTrain, yTrain) results["prediction"] = svm.predict(xTest) print("SVM Classifier") print(md.get_results(results["true_y"], results["prediction"])) print('Accuracy of the SVM on test set: {:.3f}'.format(svm.score(xTest, yTest))) knn = md.fit_KNN(xTrain, yTrain, True) results["prediction"] = knn.predict(xTest) print("KNN")