Пример #1
0
def mlr(train_features,
        train_labels,
        test_features,
        names,
        dummies,
        net_type,
        extra_features=False):
    """
    Train the Multiple Linear Regressor from training data with labels and
    perform predictions on the test data.
    """

    print('\n=== Running Multiple Linear Regression for {0} ==='.format(
        net_type))

    regressor = LinearRegression(n_jobs=-1)

    if extra_features:
        train_scaled_tmp, scaler = ml_funcs.apply_scaling(
            train_features[names], 'MLR', net_type, save_scaler=False)
        train_scaled = np.concatenate(
            [train_scaled_tmp,
             np.array(train_features[dummies])], axis=1)
    else:
        train_scaled, scaler = ml_funcs.apply_scaling(train_features,
                                                      'MLR',
                                                      net_type,
                                                      save_scaler=False)

    # Fit model to the data.
    print('>> Training the network <<')
    starttime = time()
    regressor.fit(train_scaled, train_labels.to_numpy().T[0])
    endtime = time()
    duration = endtime - starttime
    print("Time: ", round(duration, 2), "s")

    # Make sure to only perform predictions when there are test features.
    # First scale the test features as well.
    if not test_features.empty:

        if extra_features:
            test_scaled_tmp = scaler.transform(test_features[names])
            test_scaled = np.concatenate(
                [test_scaled_tmp,
                 np.array(test_features[dummies])], axis=1)
        else:
            test_scaled = scaler.transform(test_features)

        print('>> Perform predictions <<')
        starttime = time()
        predictions = regressor.predict(test_scaled)
        endtime = time()
        duration = endtime - starttime
        print("Time: ", round(duration, 2), "s")

        return predictions
Пример #2
0
def cv_rf(train_features, train_labels):
    """
    Apply cross validation for the Random Forest Regressor to find its
    optimal hyperparameters based on the training data.
    """

    # The number of trees in the random forest.
    n_estimators = np.linspace(start=50, stop=600, num=12, dtype=int)

    # The number of features to consider at every split of a node.
    max_features = ['auto', 'sqrt', 'log2']

    # The maximum depth of the trees.
    max_depth = [int(x) for x in np.linspace(2, 20, num=10, dtype=int)]
    max_depth.append(None)

    # The minimum number of samples required to split a node.
    min_samples_split = np.linspace(5, 50, num=10, dtype=int)

    # The minimum number of samples required at each leaf node.
    min_samples_leaf = np.linspace(5, 50, num=10, dtype=int)

    # The method for selecting the samples for each individual tree.
    bootstrap = [True, False]

    # Create a random grid with all parameters.
    random_grid = {'n_estimators': n_estimators,
                   'max_features': max_features,
                   'max_depth': max_depth,
                   'min_samples_split': min_samples_split,
                   'min_samples_leaf': min_samples_leaf,
                   'bootstrap': bootstrap}

    # Use the random grid to search for best hyperparameters.
    # First create the base model to tune.
    regressor = RandomForestRegressor(random_state=0)

    # Random search of parameters, using 3 fold cross validation,
    # search across 100 different combinations, use 12 processor cores.
    rf_random = RandomizedSearchCV(estimator=regressor,
                                   param_distributions=random_grid,
                                   scoring='neg_mean_absolute_error',
                                   n_iter=75, cv=5, verbose=2,
                                   random_state=0, n_jobs=-1)

    # Scale the features
    train_scaled, _ = ml_funcs.apply_scaling(train_features, 'RF', 'None', save_scaler=False)

    # Fit the random search model.
    search = rf_random.fit(train_scaled, train_labels)

    # Select the parameters that had the best outcome.
    print("RFR best estimator:")
    print(search.best_estimator_)

    print("RFR best hyperparameters of best estimator:")
    print(search.best_estimator_.get_params())

    print("RFR best hyperparameters of search obj:")
    print(search.best_params_)
Пример #3
0
def test_performance(train_features, train_labels, test_features, test_labels, method, env):
    """
    Test how much impact the hyperparameter tuning had by comparing the
    optimised model to a bare model.
    """

    if method == 'RFR':
        bare = RandomForestRegressor(random_state=0, n_jobs=-1)

        if env == 'CBD':
            optimized = RandomForestRegressor(n_estimators=450, min_samples_split=50,
                                              min_samples_leaf=15, max_features='sqrt',
                                              max_depth=14, bootstrap=False, n_jobs=-1)
        elif env in ('suburbs', 'combined'):
            optimized = RandomForestRegressor(n_estimators=100, min_samples_split=20,
                                              min_samples_leaf=5, max_features='sqrt',
                                              max_depth=None, bootstrap=True, n_jobs=-1)
        else:
            print("Not a valid environment!")
            return None

    elif method == 'SVR':
        bare = LinearSVR(random_state=0)

        if env == 'CBD':
            optimized = LinearSVR(tol=1e-4, max_iter=1800, loss='squared_epsilon_insensitive',
                                  epsilon=1.0, dual=True, C=1e-3)
        elif env == 'suburbs':
            optimized = LinearSVR(tol=1e-5, max_iter=5000, loss='squared_epsilon_insensitive',
                                  epsilon=0.0, dual=False, C=1e-4)
        elif env == 'combined':
            optimized = LinearSVR(random_state=0, tol=0.0001, max_iter=200,
                                  loss='epsilon_insensitive', epsilon=1.0,
                                  C=0.01, dual=True)
        else:
            print("Not a valid environment!")
            return None

    else:
        print("Not a valid method: choose RFR or SVR.")
        return None

    # Scale the features.
    train_scaled, scaler = ml_funcs.apply_scaling(train_features, method, env, save_scaler=False)
    test_scaled = scaler.transform(test_features)

    # Fit the data on the bare model, perform height predictions
    bare.fit(train_scaled, train_labels)
    predictions_bare = bare.predict(test_scaled)
    accuracy_bare = compute_accuracy(predictions_bare, test_labels)
    bare_mae = mean_absolute_error(predictions_bare, test_labels)

    # Now do the same for the optimized model.
    optimized.fit(train_scaled, train_labels)
    predictions_optimized = optimized.predict(test_scaled)
    accuracy_optimized = compute_accuracy(predictions_optimized, test_labels)
    optimized_mae = mean_absolute_error(predictions_optimized, test_labels)

    return accuracy_bare, bare_mae, accuracy_optimized, optimized_mae
Пример #4
0
def rf_min_samples_leaf(train_features, train_labels, test_features, test_labels, name):
    """
    Plot the minimum samples required in a leaf against the accuracy.
    """
    sns.set()
    sns.set_style("ticks")

    train_results = []
    test_results = []

    samples_start = np.linspace(2, 24, 12, dtype=int)
    samples_end = np.linspace(25, 750, num=30, dtype=int)
    min_samples_leaf = np.hstack((samples_start, samples_end))

    train_scaled, scaler = ml_funcs.apply_scaling(train_features, 'RF', name, save_scaler=False)
    test_scaled = scaler.transform(test_features)

    for samples in min_samples_leaf:
        print("Samples leaf:", samples)

        randomforest = RandomForestRegressor(min_samples_leaf=samples, n_jobs=-1, random_state=0)
        randomforest.fit(train_scaled, train_labels)
        predict_train = randomforest.predict(train_scaled)

        # Accuracy of training data (mean absolute percentage error)
        accuracy_train = compute_accuracy(predict_train, train_labels)
        train_results.append(accuracy_train)

        predict_test = randomforest.predict(test_scaled)

        # Accuracy for test data.
        accuracy_test = compute_accuracy(predict_test, test_labels)
        test_results.append(accuracy_test)

    fig = plt.figure(figsize=(10, 6))
    sns.lineplot(x=min_samples_leaf, y=train_results, label='Train')
    sns.lineplot(x=min_samples_leaf, y=test_results, label='Test')
    plt.legend(frameon=False, loc='upper right')
    plt.xlabel('Minimum samples in leaf')
    plt.ylabel('Accuracy score [%]')

    fig.tight_layout()
    sns.despine()

    if generate_plots.directory_exists("./Figures"):
        plt.savefig("./Figures/Min_Samples_Leaf_" + name + ".pdf", bbox_inches="tight", dpi=300,
                    transparent=True)
    else:
        print("Directory: ./Figures does not exist!")
Пример #5
0
def rf_max_depth(train_features, train_labels, test_features, test_labels, name):
    """
    Plot the maximum tree depth against the accuracy.
    """
    sns.set()
    sns.set_style("ticks")

    train_results = []
    test_results = []

    # Maximum depth of the tree.
    max_depth = np.linspace(1, 35, 35, dtype=int)

    train_scaled, scaler = ml_funcs.apply_scaling(train_features, 'RF', name, save_scaler=False)
    test_scaled = scaler.transform(test_features)

    for depth in max_depth:
        print("Depth:", depth)

        randomforest = RandomForestRegressor(max_depth=depth, n_jobs=-1, random_state=0)
        randomforest.fit(train_scaled, train_labels)
        predict_train = randomforest.predict(train_scaled)

        # Accuracy of training data (mean absolute percentage error)
        accuracy_train = compute_accuracy(predict_train, train_labels)
        train_results.append(accuracy_train)

        predict_test = randomforest.predict(test_scaled)

        # Accuracy for test data.
        accuracy_test = compute_accuracy(predict_test, test_labels)
        test_results.append(accuracy_test)

    fig = plt.figure(figsize=(10, 6))
    sns.lineplot(x=max_depth, y=train_results, label='Train')
    sns.lineplot(x=max_depth, y=test_results, label='Test')
    plt.legend(frameon=False, loc='lower right')
    plt.xlabel('Maximum tree depth')
    plt.ylabel('Accuracy score [%]')

    fig.tight_layout()
    sns.despine()

    if generate_plots.directory_exists("./Figures"):
        plt.savefig("./Figures/Max_Depth_" + name + ".pdf", bbox_inches="tight", dpi=300,
                    transparent=True)
    else:
        print("Directory: ./Figures does not exist!")
Пример #6
0
def rf_n_estimators(train_features, train_labels, test_features, test_labels, name):
    """
    Plot the number of estimators against the accuracy.
    """
    sns.set()
    sns.set_style("ticks")

    train_results = []
    test_results = []

    # The number of trees in the random forest.
    n_estimators = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]

    train_scaled, scaler = ml_funcs.apply_scaling(train_features, 'RF', name, save_scaler=False)
    test_scaled = scaler.transform(test_features)

    for estimator in n_estimators:
        print("Num estimators:", estimator)

        randomforest = RandomForestRegressor(n_estimators=estimator, n_jobs=-1, random_state=0)
        randomforest.fit(train_scaled, train_labels)
        predict_train = randomforest.predict(train_scaled)

        # Accuracy of training data (mean absolute percentage error)
        accuracy_train = compute_accuracy(predict_train, train_labels)
        train_results.append(accuracy_train)

        predict_test = randomforest.predict(test_scaled)

        # Accuracy for test data.
        accuracy_test = compute_accuracy(predict_test, test_labels)
        test_results.append(accuracy_test)

    fig = plt.figure(figsize=(10, 6))
    sns.lineplot(x=n_estimators, y=train_results, label='Train')
    sns.lineplot(x=n_estimators, y=test_results, label='Test')
    plt.legend(frameon=False, loc='lower right')
    plt.xlabel('Number of estimators')
    plt.ylabel('Accuracy score [%]')

    fig.tight_layout()
    sns.despine()

    if generate_plots.directory_exists("./Figures"):
        plt.savefig("./Figures/N_Estimators_" + name + ".pdf", bbox_inches="tight", dpi=300,
                    transparent=True)
    else:
        print("Directory: ./Figures does not exist!")
Пример #7
0
def svr_C(train_features, train_labels, test_features, test_labels, name):
    """
    Plot C against the accuracy.
    """
    sns.set()
    sns.set_style("ticks")

    train_results = []
    test_results = []

    c_values = np.linspace(1e-4, 1, 10)

    train_scaled, scaler = ml_funcs.apply_scaling(train_features, 'SVR', name, save_scaler=False)
    test_scaled = scaler.transform(test_features)

    for c_val in c_values:
        print("C:", c_val)

        svr = LinearSVR(C=c_val, max_iter=2000, random_state=0)
        svr.fit(train_scaled, train_labels)
        predict_train = svr.predict(train_scaled)

        # Accuracy of training data (mean absolute percentage error)
        accuracy_train = compute_accuracy(predict_train, train_labels)
        train_results.append(accuracy_train)

        predict_test = svr.predict(test_scaled)

        # Accuracy for test data.
        accuracy_test = compute_accuracy(predict_test, test_labels)
        test_results.append(accuracy_test)

    fig = plt.figure(figsize=(10, 6))
    sns.lineplot(x=c_values, y=train_results, label='Train')
    sns.lineplot(x=c_values, y=test_results, label='Test')
    plt.legend(frameon=False, loc='lower right')
    plt.xlabel('C')
    plt.ylabel('Accuracy score [%]')

    fig.tight_layout()
    sns.despine()

    if generate_plots.directory_exists("./Figures"):
        plt.savefig("./Figures/C_" + name + ".pdf", bbox_inches="tight", dpi=300,
                    transparent=True)
    else:
        print("Directory: ./Figures does not exist!")
Пример #8
0
def cv_svr(train_features, train_labels):
    """
    Perform the k-fold cross validation for the support vector regressor.
    """

    epsilon = [0.0, 0.5, 1.0]
    tol = [1e-3, 1e-4, 1e-5]
    C = [1e-4, 1e-3, 1e-2, 0.1, 1.0]
    loss = ['epsilon_insensitive', 'squared_epsilon_insensitive']
    dual = [True, False]
    max_iter = np.linspace(200, 5000, 25, dtype=int)

    # Create a random grid with all parameters.
    random_grid = {'epsilon': epsilon,
                   'tol': tol,
                   'C': C,
                   'loss': loss,
                   'dual': dual,
                   'max_iter': max_iter}

    # Use the random grid to search for best hyperparameters.
    # First create the base model to tune.
    svr = LinearSVR(random_state=0)

    # Random search of parameters, using 3 fold cross validation,
    # search across 100 different combinations, use 12 processor cores.
    svr_random = RandomizedSearchCV(estimator=svr,
                                    param_distributions=random_grid,
                                    n_iter=75, cv=5, verbose=2,
                                    random_state=0, n_jobs=-1, error_score=0.0)

    # Scale the features
    train_scaled, _ = ml_funcs.apply_scaling(train_features, 'SVR', 'None', save_scaler=False)

    # Fit the random search model.
    search = svr_random.fit(train_scaled, train_labels)

    # Select the parameters that had the best outcome.
    print("SVR best estimator:")
    print(search.best_estimator_)

    print("SVR best hyperparameters of best estimator:")
    print(search.best_estimator_.get_params())

    print("SVR best hyperparameters of search obj:")
    print(search.best_params_)
Пример #9
0
def svr_maxiter_tolerance(train_features, train_labels, test_features, test_labels, name):
    """
    Plot a combination of the maximum number of iterations and the tolerance
    against the accuracy.
    """
    sns.set()
    sns.set_style("ticks")

    train_results = []
    test_results = []

    tolerances = [1e-3, 1e-4, 1e-5]
    tol_labels = ['1e-3', '1e-4', '1e-5']
    max_iter = np.linspace(100, 5000, 50, dtype=int)

    train_scaled, scaler = ml_funcs.apply_scaling(train_features, 'SVR', name, save_scaler=False)
    test_scaled = scaler.transform(test_features)

    for tolerance in tolerances:
        temp_train = []
        temp_test = []

        print("Tolerance:", tolerance)

        for iteration in max_iter:
            print("Max. iterations:", iteration)

            svr = LinearSVR(tol=tolerance, max_iter=iteration, random_state=0)
            svr.fit(train_scaled, train_labels)
            predict_train = svr.predict(train_scaled)

            # Accuracy of training data (mean absolute percentage error)
            accuracy_train = compute_accuracy(predict_train, train_labels)
            temp_train.append(accuracy_train)

            predict_test = svr.predict(test_scaled)

            # Accuracy for test data.
            accuracy_test = compute_accuracy(predict_test, test_labels)
            temp_test.append(accuracy_test)

        train_results.append(temp_train)
        test_results.append(temp_test)

    fig = plt.figure(figsize=(10, 6))
    for i in range(len(train_results)):
        label_train = 'Train (tol' + tol_labels[i] +')'
        sns.lineplot(x=max_iter, y=train_results[i], label=label_train)
        label_test = 'Test (tol' + tol_labels[i] +')'
        sns.lineplot(x=max_iter, y=test_results[i], label=label_test)

    plt.legend(frameon=False, loc='lower left', bbox_to_anchor=(1.0, 0.0))
    plt.xlabel('Maximum number of iterations')
    plt.ylabel('Accuracy score [%]')

    fig.tight_layout()
    sns.despine()

    if generate_plots.directory_exists("./Figures"):
        plt.savefig("./Figures/MaxIter_Tolerance_" + name + ".pdf", bbox_inches="tight", dpi=300,
                    transparent=True)
    else:
        print("Directory: ./Figures does not exist!")
def feature_imp_boxplots(data, env):
    """
    Compute the feature importance based on a random forest.
    1) Impurity-based importance
    2) Permutation importance
    Information:
    https://scikit-learn.org/stable/auto_examples/inspection/plot_permutation_importance_multicollinear.html
    https://scikit-learn.org/stable/auto_examples/inspection/plot_permutation_importance.html
    """

    if env in ('CBD', 'suburbs'):
        net_type = "split"
        feature_names = np.array([
            'Area', 'Compactness', '#Neighbours', '#Adjacent Buildings',
            '#Vertices', 'Length', 'Width', 'Slimness', 'Complexity'
        ])
    elif env == 'combined':
        net_type = "single"
        feature_names = np.array([
            'Area', 'Compactness', '#Neighbours', '#Adjacent Buildings',
            '#Vertices', 'Length', 'Width', 'Slimness', 'Complexity',
            'Morphology'
        ])
    else:
        print("Boxplots feature importance: not a valid option.")
        return

    features, labels = ml_funcs.get_features_and_labels(data,
                                                        net_type,
                                                        False, [],
                                                        labels=True)

    train_X, test_X, train_y, test_y = train_test_split(features,
                                                        labels,
                                                        test_size=0.2,
                                                        random_state=0)

    train_X_scaled, scaler = ml_funcs.apply_scaling(train_X, 'RF', env)
    test_X_scaled = scaler.transform(test_X)

    if env == 'CBD':
        regressor = RandomForestRegressor(n_estimators=450,
                                          min_samples_split=50,
                                          min_samples_leaf=15,
                                          max_features='sqrt',
                                          max_depth=14,
                                          bootstrap=False,
                                          random_state=0,
                                          n_jobs=-1)
    elif env in ('suburbs', 'combined'):
        regressor = RandomForestRegressor(n_estimators=100,
                                          min_samples_split=20,
                                          min_samples_leaf=5,
                                          max_features='sqrt',
                                          max_depth=None,
                                          bootstrap=True,
                                          random_state=0,
                                          n_jobs=-1)
    else:
        print("Not a valid environment type")
        return

    regressor.fit(train_X_scaled, train_y)

    fig = plt.figure(figsize=(6, 4))
    sns.set_style("ticks")
    imp = regressor.feature_importances_
    sort_imp = imp.argsort()[::-1]
    barplot = sns.barplot(imp[sort_imp],
                          feature_names[sort_imp],
                          color='steelblue')
    barplot.set_xlabel("Importance")
    fig.tight_layout()
    sns.despine()

    if directory_exists("./Figures"):
        plt.savefig("./Figures/Importances_" + env + ".pdf",
                    bbox_inches="tight",
                    dpi=300,
                    transparent=True)
    else:
        print("Directory: ./Figures does not exist!")

    print("RF train accuracy: %0.3f" %
          regressor.score(train_X_scaled, train_y))
    print("RF test accuracy: %0.3f" % regressor.score(test_X_scaled, test_y))

    result = permutation_importance(regressor,
                                    train_X_scaled,
                                    train_y,
                                    n_repeats=25,
                                    random_state=0,
                                    n_jobs=-1)
    sorted_idx = result.importances_mean.argsort()[::-1]

    sns.set_style("ticks")
    fig, ax = plt.subplots(figsize=(6, 8))
    ax.boxplot(result.importances[sorted_idx].T)
    ax.set_ylabel("Permutation Importance")
    ax.set_xticklabels(labels=feature_names[sorted_idx],
                       rotation=45,
                       horizontalalignment='right')
    fig.tight_layout()
    sns.despine()

    if directory_exists("./Figures"):
        plt.savefig("./Figures/Perm_Importance_" + env + "_Train.pdf",
                    bbox_inches="tight",
                    dpi=300,
                    transparent=True)
    else:
        print("Directory: ./Figures does not exist!")

    result = permutation_importance(regressor,
                                    test_X_scaled,
                                    test_y,
                                    n_repeats=25,
                                    random_state=0,
                                    n_jobs=-1)
    sorted_idx = result.importances_mean.argsort()[::-1]

    sns.set_style("ticks")
    fig, ax = plt.subplots(figsize=(6, 8))
    ax.boxplot(result.importances[sorted_idx].T)
    ax.set_ylabel("Permutation Importance")
    ax.set_xticklabels(labels=feature_names[sorted_idx],
                       rotation=45,
                       horizontalalignment='right')
    fig.tight_layout()
    sns.despine()

    if directory_exists("./Figures"):
        plt.savefig("./Figures/Perm_Importance_" + env + "_Test.pdf",
                    bbox_inches="tight",
                    dpi=300,
                    transparent=True)
    else:
        print("Directory: ./Figures does not exist!")
Пример #11
0
def svr(train_features,
        train_labels,
        test_features,
        names,
        dummies,
        net_type,
        extra_features=False):
    """
    Train the Support Vector Regressor from training data with labels and
    perform predictions on the test data.
    """

    print(
        '\n=== Running Support Vector Regression for {0} ==='.format(net_type))

    regressor = LinearSVR(random_state=0,
                          tol=1e-5,
                          max_iter=5000,
                          loss='squared_epsilon_insensitive',
                          epsilon=0.0,
                          C=0.0001,
                          dual=False)

    if extra_features:
        train_scaled_tmp, scaler = ml_funcs.apply_scaling(
            train_features[names], 'SVR', net_type, save_scaler=False)
        train_scaled = np.concatenate(
            [train_scaled_tmp,
             np.array(train_features[dummies])], axis=1)
    else:
        train_scaled, scaler = ml_funcs.apply_scaling(train_features,
                                                      'SVR',
                                                      net_type,
                                                      save_scaler=False)

    # Fit model to the data.
    print('>> Training the network <<')
    starttime = time()
    regressor.fit(train_scaled, train_labels.to_numpy().T[0])
    endtime = time()
    duration = endtime - starttime
    print("Time: ", round(duration, 2), "s")

    # Make sure to only perform predictions when there are test features.
    # First scale the test features as well.
    if not test_features.empty:

        if extra_features:
            test_scaled_tmp = scaler.transform(test_features[names])
            test_scaled = np.concatenate(
                [test_scaled_tmp,
                 np.array(test_features[dummies])], axis=1)
        else:
            test_scaled = scaler.transform(test_features)

        print('>> Perform predictions <<')
        starttime = time()
        predictions = regressor.predict(test_scaled)
        endtime = time()
        duration = endtime - starttime
        print("Time: ", round(duration, 2), "s")

        return predictions
Пример #12
0
def randomforest(train_features,
                 train_labels,
                 test_features,
                 names,
                 dummies,
                 net_type,
                 extra_features=False):
    """
    Train the Random Forest Regressor from training data with labels and
    perform predictions on the test data.
    """

    print(
        '\n=== Running Random Forest Regression for {0} ==='.format(net_type))

    regressor = RandomForestRegressor(n_estimators=250,
                                      max_features='sqrt',
                                      random_state=0,
                                      n_jobs=-1)

    # https://stackoverflow.com/questions/43798377/one-hot-encode-categorical-variables-and-scale-continuous-ones-simultaneouely
    # Only apply feature scaling to the numerical features and not to the one-hot-encoded ones.
    if extra_features:
        train_scaled_tmp, scaler = ml_funcs.apply_scaling(
            train_features[names], 'RFR', net_type, save_scaler=False)
        train_scaled = np.concatenate(
            [train_scaled_tmp,
             np.array(train_features[dummies])], axis=1)
    else:
        train_scaled, scaler = ml_funcs.apply_scaling(train_features,
                                                      'RFR',
                                                      net_type,
                                                      save_scaler=False)

    # Fit model to the data.
    print('>> Training the network <<')
    starttime = time()
    regressor.fit(train_scaled, train_labels.to_numpy().T[0])
    endtime = time()
    duration_train = endtime - starttime
    print("Time: ", round(duration_train, 2), "s")

    importances = list(regressor.feature_importances_)

    # Make sure to only perform predictions when there are test features.
    # First scale the test features as well.
    if not test_features.empty:

        if extra_features:
            test_scaled_tmp = scaler.transform(test_features[names])
            test_scaled = np.concatenate(
                [test_scaled_tmp,
                 np.array(test_features[dummies])], axis=1)
        else:
            test_scaled = scaler.transform(test_features)

        print('>> Perform predictions <<')
        starttime = time()
        predictions = regressor.predict(test_scaled)
        endtime = time()
        duration_predict = endtime - starttime
        print("Time: ", round(duration_predict, 2), "s")

        return predictions, importances

    return importances