Пример #1
0
def featureComparison(
    irrelevantColumnsList,
    filename,
    columns,
    traintime,
    testtime,
    targetColumns,
    enrolWindow,
):
    global colors, models

    columnsLists = []
    deviationsLists = []
    names = []
    trainmetrics = []
    testmetrics = []

    for i, irrelevantColumns in enumerate(irrelevantColumnsList):
        mlModule.reset()

        df = mlModule.initDataframe(filename, columns, irrelevantColumns)
        df_train, df_test = mlModule.getTestTrainSplit(traintime, testtime)

        X_train, y_train, X_test, y_test = mlModule.getFeatureTargetSplit(
            targetColumns)

        mlp_1 = mlModule.MLP('MLP 1x64 d0.2 mod' + models[i],
                             layers=[64],
                             dropout=0.2)
        mlp_2 = mlModule.MLP('MLP 1x128 d0.2 mod' + models[i],
                             layers=[128],
                             dropout=0.2)
        mlp_3 = mlModule.MLP('MLP 2x64 d0.2 mod' + models[i],
                             layers=[64, 64],
                             dropout=0.2)
        mlp_4 = mlModule.MLP('MLP 2x128 d0.2 mod' + models[i],
                             layers=[128, 128],
                             dropout=0.2)
        lstm_1 = mlModule.LSTM('LSTM 1x64 d0.2 mod' + models[i],
                               layers=[64],
                               dropout=0.2,
                               recurrentDropout=0.2,
                               enrolWindow=12)
        lstm_2 = mlModule.LSTM('LSTM 1x128 d0.2 mod' + models[i],
                               layers=[128],
                               dropout=0.2,
                               recurrentDropout=0.2,
                               enrolWindow=12)
        lstm_3 = mlModule.LSTM('LSTM 2x64 d0.2 mod' + models[i],
                               layers=[64, 64],
                               dropout=0.2,
                               recurrentDropout=0.2,
                               enrolWindow=12)
        lstm_4 = mlModule.LSTM('LSTM 2x128 d0.2 mod' + models[i],
                               layers=[128, 128],
                               dropout=0.2,
                               recurrentDropout=0.2,
                               enrolWindow=12)
        linear = mlModule.Linear_Regularized('Linear rCV mod' + models[i])

        modelList = [
            mlp_1,
            mlp_2,
            mlp_3,
            mlp_4,
            lstm_1,
            lstm_2,
            lstm_3,
            lstm_4,
            linear,
        ]

        mlModule.initModels(modelList)
        retrain = False
        mlModule.trainModels(retrain)

        modelNames, metrics_train, metrics_test, columnsList, deviationsList = mlModule.predictWithModels(
            plot=True, score=True)

        if i < 1:
            columnsLists = columnsList
            deviationsLists = deviationsList
            all_names = modelNames
            all_train_metrics = metrics_train
            all_test_metrics = metrics_test
        else:
            for j_target in range(len(columnsList)):
                for k_model in range(1, len(columnsList[j_target])):
                    columnsLists[j_target].append(
                        columnsList[j_target][k_model])
                for k_model in range(0, len(deviationsList[j_target])):
                    deviationsLists[j_target].append(
                        deviationsList[j_target][k_model])
        all_names = [*all_names, *modelNames]
        all_train_metrics = [*all_train_metrics, *metrics_train]
        all_test_metrics = [*all_test_metrics, *metrics_test]

        names.append(modelNames)
        trainmetrics.append(metrics_train)
        testmetrics.append(metrics_test)

    indexColumn = mlModule._indexColumn
    columnDescriptions = mlModule._columnDescriptions
    columnUnits = mlModule._columnUnits
    traintime = mlModule._traintime

    for i in range(len(deviationsLists)):
        for j in range(len(deviationsLists[i])):
            deviationsLists[i][j][3] = colors[j]

    for i in range(len(columnsLists)):
        columnsList[i][0][3] = 'red'
        for j in range(1, len(columnsLists[i])):
            columnsLists[i][j][3] = colors[j - 1]

    printModelScores(
        all_names,
        all_train_metrics,
        all_test_metrics,
    )
    plotModelPredictions(
        plt,
        deviationsLists,
        columnsLists,
        indexColumn,
        columnDescriptions,
        columnUnits,
        traintime,
        interpol=False,
    )
    plotModelScores(
        plt,
        all_names,
        all_train_metrics,
        all_test_metrics,
    )
Пример #2
0
    df = mlModule.initDataframe(filename, columns, irrelevantColumns)
    df_train, df_test = mlModule.getTestTrainSplit(traintime, testtime)
    X_train, y_train, X_test, y_test = mlModule.getFeatureTargetSplit(targetColumns)

    mlp_mae = mlModule.MLP('MLP 1x128 d0.2 mae mod'+model, layers=[128], dropout=0.2, loss='mean_absolute_error', metrics=['mean_absolute_error'])
    mlp_mse = mlModule.MLP('MLP 1x128 d0.2 mse mod'+model, layers=[128], dropout=0.2, loss='mean_squared_error', metrics=['mean_squared_error'])
    lstm_mae = mlModule.LSTM('LSTM 1x128 d0.2 mae mod'+model, layers=[128], dropout=0.2, recurrentDropout=0.2, enrolWindow=12, loss='mean_absolute_error', metrics=['mean_absolute_error'])
    lstm_mse = mlModule.LSTM('LSTM 1x128 d0.2 mse mod'+model, layers=[128], dropout=0.2, recurrentDropout=0.2, enrolWindow=12, loss='mean_squared_error', metrics=['mean_squared_error'])
    
    modelList = [
        mlp_mae,
        mlp_mse,
        lstm_mae,
        lstm_mse,
    ]

    initTrainPredict(modelList)

pred('D', 'A', '30min')
mlModule.reset()
pred('D', 'B', '30min')
mlModule.reset()
pred('F', 'A', '30min')
mlModule.reset()
pred('F', 'B', '30min')
mlModule.reset()
pred('G', 'A', '30min')
mlModule.reset()
pred('G', 'B', '30min')
mlModule.reset()
Пример #3
0
df = mlApi.initDataframe(filename, columns, irrelevantColumns)
df_train, df_test = mlApi.getTestTrainSplit(traintime, testtime)
df_test_1, df_test_2 = mlApi.getTestTrainSplit([testtime1], testtime2)
df_test_joined = pd.concat([df_test_1, df_test_2])

# 3. Plot correlation plots

mlApi.correlationPlot(df_train, datasetName + " train")
mlApi.correlationDuoPlot(df_test_1, df_test_2, datasetName + " test 1",
                         datasetName + " test 2")
mlApi.correlationDifferencePlot(
    df_train, df_test_joined, "Difference, " + datasetName + " train and test")

# Reset to prepare for second dataset
# -------------------------------------
mlApi.reset()
# -------------------------------------

# 1.

filename = "../master-thesis-db/datasets/F/data_180min.csv"

datasetName = "F - Real HX"

columns = [
    ['FYN0111', 'Gasseksport rate', 'MSm^3/d'],
    ['TT0106_MA_Y', 'Varm side C temperatur inn', 'degrees'],
    ['TIC0105_CA_YX', 'Varm side C temperatur ut', 'degrees'],
    ['TI0115_MA_Y', 'Scrubber temperatur ut', 'degrees'],
    ['PIC0104_CA_YX', 'Innløpsseparator trykk', 'Barg'],
    ['TIC0425_CA_YX', 'Kald side temperatur inn', 'degrees'],