def featureComparison( irrelevantColumnsList, filename, columns, traintime, testtime, targetColumns, enrolWindow, ): global colors, models columnsLists = [] deviationsLists = [] names = [] trainmetrics = [] testmetrics = [] for i, irrelevantColumns in enumerate(irrelevantColumnsList): mlModule.reset() df = mlModule.initDataframe(filename, columns, irrelevantColumns) df_train, df_test = mlModule.getTestTrainSplit(traintime, testtime) X_train, y_train, X_test, y_test = mlModule.getFeatureTargetSplit( targetColumns) mlp_1 = mlModule.MLP('MLP 1x64 d0.2 mod' + models[i], layers=[64], dropout=0.2) mlp_2 = mlModule.MLP('MLP 1x128 d0.2 mod' + models[i], layers=[128], dropout=0.2) mlp_3 = mlModule.MLP('MLP 2x64 d0.2 mod' + models[i], layers=[64, 64], dropout=0.2) mlp_4 = mlModule.MLP('MLP 2x128 d0.2 mod' + models[i], layers=[128, 128], dropout=0.2) lstm_1 = mlModule.LSTM('LSTM 1x64 d0.2 mod' + models[i], layers=[64], dropout=0.2, recurrentDropout=0.2, enrolWindow=12) lstm_2 = mlModule.LSTM('LSTM 1x128 d0.2 mod' + models[i], layers=[128], dropout=0.2, recurrentDropout=0.2, enrolWindow=12) lstm_3 = mlModule.LSTM('LSTM 2x64 d0.2 mod' + models[i], layers=[64, 64], dropout=0.2, recurrentDropout=0.2, enrolWindow=12) lstm_4 = mlModule.LSTM('LSTM 2x128 d0.2 mod' + models[i], layers=[128, 128], dropout=0.2, recurrentDropout=0.2, enrolWindow=12) linear = mlModule.Linear_Regularized('Linear rCV mod' + models[i]) modelList = [ mlp_1, mlp_2, mlp_3, mlp_4, lstm_1, lstm_2, lstm_3, lstm_4, linear, ] mlModule.initModels(modelList) retrain = False mlModule.trainModels(retrain) modelNames, metrics_train, metrics_test, columnsList, deviationsList = mlModule.predictWithModels( plot=True, score=True) if i < 1: columnsLists = columnsList deviationsLists = deviationsList all_names = modelNames all_train_metrics = metrics_train all_test_metrics = metrics_test else: for j_target in range(len(columnsList)): for k_model in range(1, len(columnsList[j_target])): columnsLists[j_target].append( columnsList[j_target][k_model]) for k_model in range(0, len(deviationsList[j_target])): deviationsLists[j_target].append( deviationsList[j_target][k_model]) all_names = [*all_names, *modelNames] all_train_metrics = [*all_train_metrics, *metrics_train] all_test_metrics = [*all_test_metrics, *metrics_test] names.append(modelNames) trainmetrics.append(metrics_train) testmetrics.append(metrics_test) indexColumn = mlModule._indexColumn columnDescriptions = mlModule._columnDescriptions columnUnits = mlModule._columnUnits traintime = mlModule._traintime for i in range(len(deviationsLists)): for j in range(len(deviationsLists[i])): deviationsLists[i][j][3] = colors[j] for i in range(len(columnsLists)): columnsList[i][0][3] = 'red' for j in range(1, len(columnsLists[i])): columnsLists[i][j][3] = colors[j - 1] printModelScores( all_names, all_train_metrics, all_test_metrics, ) plotModelPredictions( plt, deviationsLists, columnsLists, indexColumn, columnDescriptions, columnUnits, traintime, interpol=False, ) plotModelScores( plt, all_names, all_train_metrics, all_test_metrics, )
df = mlModule.initDataframe(filename, columns, irrelevantColumns) df_train, df_test = mlModule.getTestTrainSplit(traintime, testtime) X_train, y_train, X_test, y_test = mlModule.getFeatureTargetSplit(targetColumns) mlp_mae = mlModule.MLP('MLP 1x128 d0.2 mae mod'+model, layers=[128], dropout=0.2, loss='mean_absolute_error', metrics=['mean_absolute_error']) mlp_mse = mlModule.MLP('MLP 1x128 d0.2 mse mod'+model, layers=[128], dropout=0.2, loss='mean_squared_error', metrics=['mean_squared_error']) lstm_mae = mlModule.LSTM('LSTM 1x128 d0.2 mae mod'+model, layers=[128], dropout=0.2, recurrentDropout=0.2, enrolWindow=12, loss='mean_absolute_error', metrics=['mean_absolute_error']) lstm_mse = mlModule.LSTM('LSTM 1x128 d0.2 mse mod'+model, layers=[128], dropout=0.2, recurrentDropout=0.2, enrolWindow=12, loss='mean_squared_error', metrics=['mean_squared_error']) modelList = [ mlp_mae, mlp_mse, lstm_mae, lstm_mse, ] initTrainPredict(modelList) pred('D', 'A', '30min') mlModule.reset() pred('D', 'B', '30min') mlModule.reset() pred('F', 'A', '30min') mlModule.reset() pred('F', 'B', '30min') mlModule.reset() pred('G', 'A', '30min') mlModule.reset() pred('G', 'B', '30min') mlModule.reset()
df = mlApi.initDataframe(filename, columns, irrelevantColumns) df_train, df_test = mlApi.getTestTrainSplit(traintime, testtime) df_test_1, df_test_2 = mlApi.getTestTrainSplit([testtime1], testtime2) df_test_joined = pd.concat([df_test_1, df_test_2]) # 3. Plot correlation plots mlApi.correlationPlot(df_train, datasetName + " train") mlApi.correlationDuoPlot(df_test_1, df_test_2, datasetName + " test 1", datasetName + " test 2") mlApi.correlationDifferencePlot( df_train, df_test_joined, "Difference, " + datasetName + " train and test") # Reset to prepare for second dataset # ------------------------------------- mlApi.reset() # ------------------------------------- # 1. filename = "../master-thesis-db/datasets/F/data_180min.csv" datasetName = "F - Real HX" columns = [ ['FYN0111', 'Gasseksport rate', 'MSm^3/d'], ['TT0106_MA_Y', 'Varm side C temperatur inn', 'degrees'], ['TIC0105_CA_YX', 'Varm side C temperatur ut', 'degrees'], ['TI0115_MA_Y', 'Scrubber temperatur ut', 'degrees'], ['PIC0104_CA_YX', 'Innløpsseparator trykk', 'Barg'], ['TIC0425_CA_YX', 'Kald side temperatur inn', 'degrees'],