def split_network(train_suburbs, train_cbds, test_db, tables_test, model_suburbs, scaler_suburbs,
                  model_cbds, scaler_cbds, method, save_predictions, labels, save_model,
                  test_subsets, feature_subset):
    """
    Perform the machine learning based on a split training network
    that separates the CBDs and Suburban and Rural areas.
    """

    reading_time = []
    predict_time = []

    if len(train_suburbs) != 0:
        train_feat_suburb, train_label_suburb = ml_funcs.get_features_and_labels(train_suburbs,
                                                                                 "split",
                                                                                 test_subsets,
                                                                                 feature_subset,
                                                                                 labels=True)

    if len(train_cbds) != 0:
        train_feat_cbd, train_label_cbd = ml_funcs.get_features_and_labels(train_cbds, "split",
                                                                           test_subsets,
                                                                           feature_subset,
                                                                           labels=True)

    # A database to perform the tests/ predictions on is specified.
    if test_db:
        connection = db_funcs.setup_connection(test_db)
        connection.autocommit = True
        cursor = connection.cursor()

        # If no specific tables are selected, perform predictions
        # for all tables in the specified testing database.
        if not tables_test:
            tables_test = db_funcs.unique_tables(cursor)

        for table in tables_test:
            if table == 'cbds':
                continue

            print(80*'-')
            print(80*'-')

            starttime = time()

            test_suburbs, test_cbds, _ = db_funcs.read_data(connection, table, training=labels)

            endtime = time()
            duration = endtime - starttime
            reading_time.append(duration)

            if labels:
                test_feat_suburbs, test_labels_suburbs = \
                  ml_funcs.get_features_and_labels(test_suburbs, "split", test_subsets,
                                                   feature_subset, labels=labels)
                test_feat_cbds, test_labels_cbds = \
                  ml_funcs.get_features_and_labels(test_cbds, "split", test_subsets, feature_subset,
                                                   labels=labels)
            else:
                test_feat_suburbs = \
                  ml_funcs.get_features_and_labels(test_suburbs, "split", test_subsets,
                                                   feature_subset, labels=labels)
                test_feat_cbds = ml_funcs.get_features_and_labels(test_cbds, "split", test_subsets,
                                                                  feature_subset, labels=labels)

            pred_cbds, pred_suburbs = np.array([]), np.array([])

            starttime = time()

            # There is no training data specified, use model.
            if len(train_suburbs) == 0 and len(train_cbds) == 0:
                if method == "RFR":

                    # There must be test features for the CBD present.
                    if len(test_feat_cbds) != 0:
                        pred_cbds, imp_cbds = predict_from_model(method, test_feat_cbds,
                                                                 model_cbds, scaler_cbds,
                                                                 'CBD')
                    else:
                        print("Warning: no CBD data present in test set {0}".format(table))

                    # There must be test features for the suburbs/rural areas present.
                    if len(test_feat_suburbs) != 0:
                        pred_suburbs, imp_suburbs = predict_from_model(method, test_feat_suburbs,
                                                                       model_suburbs,
                                                                       scaler_suburbs, 'suburbs')
                    else:
                        print("Warning: no rural/suburban data present in test set {0}"\
                              .format(table))

                else:
                    # There must be test features for the CBD present.
                    if len(test_feat_cbds) != 0:
                        pred_cbds = predict_from_model(method, test_feat_cbds, model_cbds,
                                                       scaler_cbds, 'CBD')
                    else:
                        print("Warning: no CBD data present in test set {0}".format(table))

                    # There must be test features for the suburbs/rural areas present.
                    if len(test_feat_suburbs) != 0:
                        pred_suburbs = predict_from_model(method, test_feat_suburbs,
                                                          model_suburbs, scaler_suburbs,
                                                          'suburbs')
                    else:
                        print("Warning: no rural/suburban data present in test set {0}"\
                              .format(table))

            # There is training data specified, check which area morphologies are present.
            else:
                if method == "RFR":
                    if len(train_suburbs) != 0 and len(test_feat_suburbs) != 0:
                        pred_suburbs, imp_suburbs = train_from_data(method, train_feat_suburb,
                                                                    train_label_suburb,
                                                                    test_feat_suburbs,
                                                                    save_model, 'suburbs')
                    else:
                        print("Warning: training and testing data do not both contain " +\
                              "suburban/rural data!")

                    if len(train_cbds) != 0 and len(test_feat_cbds) != 0:
                        pred_cbds, imp_cbds = train_from_data(method, train_feat_cbd,
                                                              train_label_cbd, test_feat_cbds,
                                                              save_model, 'CBD')
                    else:
                        print("Warning: training and testing data do not both contain CBD data!")

                else:
                    if len(train_suburbs) != 0 and len(test_feat_suburbs) != 0:
                        pred_suburbs = train_from_data(method, train_feat_suburb,
                                                       train_label_suburb,
                                                       test_feat_suburbs, save_model, 'suburbs')
                    else:
                        print("Warning: training and testing data do not both contain " +\
                              "suburban/rural data!")

                    if len(train_cbds) != 0 and len(test_feat_cbds) != 0:
                        pred_cbds = train_from_data(method, train_feat_cbd, train_label_cbd,
                                                    test_feat_cbds, save_model, 'CBD')
                    else:
                        print("Warning: training and testing data do not both contain CBD data!")

            endtime = time()
            duration = endtime - starttime
            predict_time.append(duration)

            # Labels are present: print statistics for the height predictions.
            if labels:
                if method == "RFR":
                    if len(pred_suburbs) != 0:
                        ml_funcs.get_statistics(test_labels_suburbs, pred_suburbs, "split",
                                                feature_subset, imp_suburbs)
                        generate_plots.plot_cumulative_errors(test_labels_suburbs, pred_suburbs,
                                                              'suburbs')
                    if len(pred_cbds) != 0:
                        ml_funcs.get_statistics(test_labels_cbds, pred_cbds, "split",
                                                feature_subset, imp_cbds)
                        generate_plots.plot_cumulative_errors(test_labels_cbds, pred_cbds, 'CBDs')
                else:
                    if len(pred_suburbs) != 0:
                        ml_funcs.get_statistics(test_labels_suburbs, pred_suburbs, "split",
                                                feature_subset)
                        generate_plots.plot_cumulative_errors(test_labels_suburbs, pred_suburbs,
                                                              'suburbs')
                    if len(pred_cbds) != 0:
                        ml_funcs.get_statistics(test_labels_cbds, pred_cbds, "split",
                                                feature_subset)
                        generate_plots.plot_cumulative_errors(test_labels_cbds, pred_cbds, 'CBD')

            # Store predictions in database.
            if save_predictions:
                if len(pred_suburbs) != 0:
                    height_values = list(zip(test_suburbs.id, pred_suburbs))
                    db_funcs.store_predictions(cursor, height_values, table, method, 'split')

                if len(pred_cbds) != 0:
                    height_values = list(zip(test_cbds.id, pred_cbds))
                    db_funcs.store_predictions(cursor, height_values, table, method, 'split')

        db_funcs.close_connection(connection, cursor)

        print("\n>> Total duration (s) of reading data " + \
              "into dataframes: {0} ({1})".format(sum(reading_time),
                                                  timedelta(seconds=sum(reading_time))))
        print("\n>> Total duration (s) of the building " + \
              " height predictions: {0} ({1})".format(sum(predict_time),
                                                      timedelta(seconds=sum(predict_time))))

    # No test database is specified, only train the model based on the training data.
    # Useful when training and storing a model to a file.
    else:
        if len(train_suburbs) != 0:
            train_from_data(method, train_feat_suburb, train_label_suburb, np.array([]),
                            save_model, 'suburbs')
        if len(train_cbds) != 0:
            train_from_data(method, train_feat_cbd, train_label_cbd, np.array([]),
                            save_model, 'CBD')
def single_network(train_data, test_db, tables_test, model, scaler, method,
                   save_predictions, labels, save_model, test_subsets, feature_subset):
    """
    Perform the machine learning based on a single training network
    that combines the CBDs and Suburban and Rural areas.
    """

    reading_time = []
    predict_time = []

    if len(train_data) != 0:
        train_features, train_labels = ml_funcs.get_features_and_labels(train_data, "single",
                                                                        test_subsets,
                                                                        feature_subset,
                                                                        labels=True)

    # A database to perform the tests/ predictions on is specified.
    if test_db:
        connection = db_funcs.setup_connection(test_db)
        connection.autocommit = True
        cursor = connection.cursor()

        # If no specific tables are selected, perform predictions
        # for all tables in the specified testing database.
        if not tables_test:
            tables_test = db_funcs.unique_tables(cursor)

        for table in tables_test:
            if table == 'cbds':
                continue

            print(80*'-')
            print(80*'-')

            starttime = time()

            _, _, test_data = db_funcs.read_data(connection, table, training=labels)

            endtime = time()
            duration = endtime - starttime
            reading_time.append(duration)

            if labels:
                test_features, test_labels = ml_funcs.get_features_and_labels(test_data, "single",
                                                                              test_subsets,
                                                                              feature_subset,
                                                                              labels=labels)
            else:
                test_features = ml_funcs.get_features_and_labels(test_data, "single", test_subsets,
                                                                 feature_subset, labels=labels)

            starttime = time()

            if len(train_data) == 0:
                if method == "RFR":
                    predictions, importances = predict_from_model(method, test_features,
                                                                  model, scaler, 'combined')
                else:
                    predictions = predict_from_model(method, test_features, model, scaler,
                                                     'combined')
            else:
                if method == "RFR":
                    predictions, importances = train_from_data(method, train_features,
                                                               train_labels, test_features,
                                                               save_model, 'combined')
                else:
                    predictions = train_from_data(method, train_features, train_labels,
                                                  test_features, save_model, 'combined')

            endtime = time()
            duration = endtime - starttime
            predict_time.append(duration)

            # Labels are present: print statistics for the height predictions.
            if labels:
                if method == "RFR":
                    ml_funcs.get_statistics(test_labels, predictions, "single", feature_subset,
                                            importances)
                else:
                    ml_funcs.get_statistics(test_labels, predictions, "single", feature_subset)
                generate_plots.plot_cumulative_errors(test_labels, predictions, 'combined',)

            # Store predictions in database.
            if save_predictions:
                height_values = list(zip(test_data.id, predictions))
                db_funcs.store_predictions(cursor, height_values, table, method, 'combined')

        db_funcs.close_connection(connection, cursor)

        print("\n>> Total duration (s) of reading data " + \
              "into dataframes: {0} ({1})".format(sum(reading_time),
                                                  timedelta(seconds=sum(reading_time))))
        print("\n>> Total duration (s) of the building " + \
              " height predictions: {0} ({1})".format(sum(predict_time),
                                                      timedelta(seconds=sum(predict_time))))

    # No test database is specified, only train the model based on the training data.
    # Useful when training and storing a model to a file.
    else:
        if len(train_features) != 0:
            train_from_data(method, train_features, train_labels, np.array([]),
                            save_model, 'combined')
Пример #3
0
def test_geom_features_single(data, cursor, table, store_results, method):
    """
    Only include geometric features during the training and
    prediction process. Based on a single training network.
    """

    features = [
        "area", "compactness", "num_neighbours", "num_adjacent_blds",
        "num_vertices", "length", "width", "slimness", "complexity", "cbd"
    ]
    labels = ["rel_height"]
    dummies = []

    X_train, X_test, y_train, y_test = train_test_split(data[features],
                                                        data[labels],
                                                        test_size=0.75,
                                                        random_state=42)

    y_test = y_test.to_numpy().T[0]

    if method == "RFR":
        predictions, importances = randomforest(X_train,
                                                y_train,
                                                X_test,
                                                features,
                                                dummies,
                                                "combined",
                                                extra_features=False)

        print_statistics(features, importances, predictions, y_test,
                         "combined", method)
        # generate_plots.plot_cumulative_errors(y_test, predictions, 'combined')

    elif method == "MLR":
        predictions = mlr(X_train,
                          y_train,
                          X_test,
                          features,
                          dummies,
                          "combined",
                          extra_features=False)

        print_statistics(features, None, predictions, y_test, "combined",
                         method)
        # generate_plots.plot_cumulative_errors(y_test, predictions, 'combined')

    elif method == "SVR":
        predictions = svr(X_train,
                          y_train,
                          X_test,
                          features,
                          dummies,
                          "combined",
                          extra_features=False)

        print_statistics(features, None, predictions, y_test, "combined",
                         method)
        # generate_plots.plot_cumulative_errors(y_test, predictions, 'combined')

    else:
        print("Not a valid method.")
        return

    if store_results:
        name = method + "_geometric_single"
        height_vals = list(zip(data.loc[X_test.index].id, predictions))
        db_funcs.store_predictions(cursor, height_vals, table, name,
                                   'combined')

        # Negative: underestimation, positive: overestimation
        # Store relative error and the percentage error in the database.
        rel_errors = (predictions - y_test)
        perc_error = ((predictions - y_test) / y_test) * 100
        error_val = list(zip(data.loc[X_test.index].id, rel_errors,
                             perc_error))
        db_funcs.store_errors(cursor, error_val, table, name, 'combined')
Пример #4
0
def test_all_features_split(data_suburb, data_cbd, cursor, table,
                            store_results, method):
    """
    Include both geometric and non-geometric features during
    the training and prediction process. Based on a split training network of suburbs/
    rural areas and CBDs.

    Source: https://blog.cambridgespark.com/robust-one-hot-encoding-in-python-3e29bfcec77e
    """

    # Create the dummy columns (one hot encoding) for the categorical data.
    cat_columns = ['bldg_type']
    suburb_processed = pd.get_dummies(data_suburb,
                                      prefix_sep="__",
                                      columns=cat_columns)
    cbd_processed = pd.get_dummies(data_cbd,
                                   prefix_sep="__",
                                   columns=cat_columns)

    # Extract the names from the dummy columns for later use.
    cat_dummies_suburb = [col for col in suburb_processed if "__" in col \
                          and col.split("__")[0] in cat_columns]
    cat_dummies_cbd = [col for col in cbd_processed if "__" in col \
                       and col.split("__")[0] in cat_columns]

    # Create list of features so we can extract the data from the dataframe. CBD and suburbs
    # may have separatere categorical features present.
    features_general = [
        "area", "compactness", "num_neighbours", "num_adjacent_blds",
        "num_vertices", "length", "width", "slimness", "complexity",
        "avg_hh_income", "avg_hh_size", "pop_density", "h_mean",
        "num_amenities"
    ]
    features_suburb = features_general + cat_dummies_suburb
    features_cbd = features_general + cat_dummies_cbd

    labels = ["rel_height"]

    # Split the data into a training and testing set.
    X_sub_train, X_sub_test, y_sub_train, y_sub_test = \
      train_test_split(suburb_processed[features_suburb], suburb_processed[labels],
                       test_size=0.75, random_state=42)

    X_cbd_train, X_cbd_test, y_cbd_train, y_cbd_test = \
      train_test_split(cbd_processed[features_cbd], cbd_processed[labels],
                       test_size=0.75, random_state=42)

    y_sub_test = y_sub_test.to_numpy().T[0]
    y_cbd_test = y_cbd_test.to_numpy().T[0]

    if method == "RFR":
        # Run the random forest regressor for the suburban data and print the results.
        pred_suburbs, imp_suburbs = randomforest(X_sub_train,
                                                 y_sub_train,
                                                 X_sub_test,
                                                 features_general,
                                                 cat_dummies_suburb,
                                                 "suburbs",
                                                 extra_features=True)

        print_statistics(features_suburb, imp_suburbs, pred_suburbs,
                         y_sub_test, "suburbs", method)
        # generate_plots.plot_cumulative_errors(y_sub_test, pred_suburbs, 'suburbs')

        # Run the random forest regressor for the CBD data and print the results.
        pred_cbd, imp_cbd = randomforest(X_cbd_train,
                                         y_cbd_train,
                                         X_cbd_test,
                                         features_general,
                                         cat_dummies_cbd,
                                         "CBD",
                                         extra_features=True)

        print_statistics(features_cbd, imp_cbd, pred_cbd, y_cbd_test, "CBD",
                         method)
        # generate_plots.plot_cumulative_errors(y_cbd_test, pred_cbd, 'CBD')

    elif method == "MLR":
        # Run the multiple linear regressor for the suburban data and print the results.
        pred_suburbs = mlr(X_sub_train,
                           y_sub_train,
                           X_sub_test,
                           features_general,
                           cat_dummies_suburb,
                           "suburbs",
                           extra_features=True)

        print_statistics(features_suburb, None, pred_suburbs, y_sub_test,
                         "suburbs", method)
        # generate_plots.plot_cumulative_errors(y_sub_test, pred_suburbs, 'suburbs')

        # Run the multiple linear regressor for the CBD data and print the results.
        pred_cbd = mlr(X_cbd_train,
                       y_cbd_train,
                       X_cbd_test,
                       features_general,
                       cat_dummies_cbd,
                       "CBD",
                       extra_features=True)

        print_statistics(features_cbd, None, pred_cbd, y_cbd_test, "CBD",
                         method)
        # generate_plots.plot_cumulative_errors(y_cbd_test, pred_cbd, 'CBD')

    elif method == "SVR":
        # Run the support vector regressor for the suburban data and print the results.
        pred_suburbs = svr(X_sub_train,
                           y_sub_train,
                           X_sub_test,
                           features_general,
                           cat_dummies_suburb,
                           "suburbs",
                           extra_features=True)

        print_statistics(features_suburb, None, pred_suburbs, y_sub_test,
                         "suburbs", method)
        # generate_plots.plot_cumulative_errors(y_sub_test, pred_suburbs, 'suburbs')

        # Run the support vector regressor for the CBD data and print the results.
        pred_cbd = svr(X_cbd_train,
                       y_cbd_train,
                       X_cbd_test,
                       features_general,
                       cat_dummies_cbd,
                       "CBD",
                       extra_features=True)

        print_statistics(features_cbd, None, pred_cbd, y_cbd_test, "CBD",
                         method)
        # generate_plots.plot_cumulative_errors(y_cbd_test, pred_cbd, 'CBD')

    else:
        print("Not a valid method.")
        return

    if store_results:
        name = method + "_all_split"
        height_vals_suburb = list(
            zip(data_suburb.loc[X_sub_test.index].id, pred_suburbs))
        db_funcs.store_predictions(cursor, height_vals_suburb, table, name,
                                   'suburbs')

        height_vals_cbd = list(zip(data_cbd.loc[X_cbd_test.index].id,
                                   pred_cbd))
        db_funcs.store_predictions(cursor, height_vals_cbd, table, name,
                                   'CBDs')

        # Negative: underestimation, positive: overestimation
        # Store relative error and the percentage error in the database.
        rel_errors_suburbs = (pred_suburbs - y_sub_test)
        perc_error_suburbs = ((pred_suburbs - y_sub_test) / y_sub_test) * 100
        error_val_suburb = list(
            zip(data_suburb.loc[X_sub_test.index].id, rel_errors_suburbs,
                perc_error_suburbs))
        db_funcs.store_errors(cursor, error_val_suburb, table, name, 'suburbs')

        rel_errors_cbd = (pred_cbd - y_cbd_test)
        perc_error_cbd = ((pred_cbd - y_cbd_test) / y_cbd_test) * 100
        error_vals_cbd = list(
            zip(data_cbd.loc[X_cbd_test.index].id, rel_errors_cbd,
                perc_error_cbd))
        db_funcs.store_errors(cursor, error_vals_cbd, table, name, 'CBD')
Пример #5
0
def test_all_features_single(data, cursor, table, store_results, method):
    """
    Include both geometric and non-geometric features during
    the training and prediction process. Based on a single training network.
    """

    # Create the dummy columns (one hot encoding) for the categorical data.
    cat_columns = ['bldg_type']
    data_processed = pd.get_dummies(data, prefix_sep="__", columns=cat_columns)

    # Extract the names from the dummy columns for later use.
    cat_dummies = [col for col in data_processed if "__" in col \
                   and col.split("__")[0] in cat_columns]

    # Create list of features so we can extract the data from the dataframe.
    features_general = [
        "area", "compactness", "num_neighbours", "num_adjacent_blds",
        "num_vertices", "length", "width", "slimness", "complexity", "cbd",
        "avg_hh_income", "avg_hh_size", "pop_density", "h_mean",
        "num_amenities"
    ]

    features_all = features_general + cat_dummies
    labels = ["rel_height"]

    X_train, X_test, y_train, y_test = train_test_split(
        data_processed[features_all],
        data_processed[labels],
        test_size=0.75,
        random_state=42)

    y_test = y_test.to_numpy().T[0]

    if method == "RFR":
        predictions, importances = randomforest(X_train,
                                                y_train,
                                                X_test,
                                                features_all,
                                                cat_dummies,
                                                "combined",
                                                extra_features=False)

        print_statistics(features_all, importances, predictions, y_test,
                         "combined", method)
        # generate_plots.plot_cumulative_errors(y_test, predictions, 'combined')

    elif method == "MLR":
        predictions = mlr(X_train,
                          y_train,
                          X_test,
                          features_all,
                          cat_dummies,
                          "combined",
                          extra_features=False)

        print_statistics(features_all, None, predictions, y_test, "combined",
                         method)
        # generate_plots.plot_cumulative_errors(y_test, predictions, 'combined')

    elif method == "SVR":
        predictions = svr(X_train,
                          y_train,
                          X_test,
                          features_all,
                          cat_dummies,
                          "combined",
                          extra_features=False)

        print_statistics(features_all, None, predictions, y_test, "combined",
                         method)
        # generate_plots.plot_cumulative_errors(y_test, predictions, 'combined')

    else:
        print("Not a valid method.")
        return

    if store_results:
        name = method + "_geometric_single"
        height_vals = list(zip(data.loc[X_test.index].id, predictions))
        db_funcs.store_predictions(cursor, height_vals, table, name,
                                   'combined')

        # Negative: underestimation, positive: overestimation
        # Store relative error and the percentage error in the database.
        rel_errors = (predictions - y_test)
        perc_error = ((predictions - y_test) / y_test) * 100
        error_val = list(zip(data.loc[X_test.index].id, rel_errors,
                             perc_error))
        db_funcs.store_errors(cursor, error_val, table, name, 'combined')
Пример #6
0
def test_geom_features_split(data_suburb, data_cbd, cursor, table,
                             store_results, method):
    """
    Only include geometric features during the training and
    prediction process. Based on a split training network of suburbs/
    rural areas and CBDs.
    """
    features = [
        "area", "compactness", "num_neighbours", "num_adjacent_blds",
        "num_vertices", "length", "width", "slimness", "complexity"
    ]
    labels = ["rel_height"]
    dummies = []

    # Split the data into a training and testing set.
    X_sub_train, X_sub_test, y_sub_train, y_sub_test = train_test_split(
        data_suburb[features],
        data_suburb[labels],
        test_size=0.75,
        random_state=42)

    X_cbd_train, X_cbd_test, y_cbd_train, y_cbd_test = train_test_split(
        data_cbd[features], data_cbd[labels], test_size=0.75, random_state=42)

    y_sub_test = y_sub_test.to_numpy().T[0]
    y_cbd_test = y_cbd_test.to_numpy().T[0]

    if method == "RFR":
        # Run the random forest regressor for the suburban data and print the results.
        pred_suburbs, imp_suburbs = randomforest(X_sub_train,
                                                 y_sub_train,
                                                 X_sub_test,
                                                 features,
                                                 dummies,
                                                 "suburbs",
                                                 extra_features=False)

        print_statistics(features, imp_suburbs, pred_suburbs, y_sub_test,
                         "suburbs", method)
        # generate_plots.plot_cumulative_errors(y_sub_test, pred_suburbs, 'suburbs')

        # Run the random forest regressor for the CBD data and print the results.
        pred_cbd, imp_cbd = randomforest(X_cbd_train,
                                         y_cbd_train,
                                         X_cbd_test,
                                         features,
                                         dummies,
                                         "CBD",
                                         extra_features=False)

        print_statistics(features, imp_cbd, pred_cbd, y_cbd_test, "CBD",
                         method)
        # generate_plots.plot_cumulative_errors(y_cbd_test, pred_cbd, 'CBD')

    elif method == "MLR":
        # Run the multiple linear regressor for the suburban data and print the results.
        pred_suburbs = mlr(X_sub_train,
                           y_sub_train,
                           X_sub_test,
                           features,
                           dummies,
                           "suburbs",
                           extra_features=False)

        print_statistics(features, None, pred_suburbs, y_sub_test, "suburbs",
                         method)
        # generate_plots.plot_cumulative_errors(y_sub_test, pred_suburbs, 'suburbs')

        # Run the multiple linear regressor for the CBD data and print the results.
        pred_cbd = mlr(X_cbd_train,
                       y_cbd_train,
                       X_cbd_test,
                       features,
                       dummies,
                       "CBD",
                       extra_features=False)

        print_statistics(features, None, pred_cbd, y_cbd_test, "CBD", method)
        # generate_plots.plot_cumulative_errors(y_cbd_test, pred_cbd, 'CBD')

    elif method == "SVR":
        # Run the support vector regressor for the suburban data and print the results.
        pred_suburbs = svr(X_sub_train,
                           y_sub_train,
                           X_sub_test,
                           features,
                           dummies,
                           "suburbs",
                           extra_features=False)

        print_statistics(features, None, pred_suburbs, y_sub_test, "suburbs",
                         method)
        # generate_plots.plot_cumulative_errors(y_sub_test, pred_suburbs, 'suburbs')

        # Run the support vector regressor for the CBD data and print the results.
        pred_cbd = svr(X_cbd_train,
                       y_cbd_train,
                       X_cbd_test,
                       features,
                       dummies,
                       "CBD",
                       extra_features=False)

        print_statistics(features, None, pred_cbd, y_cbd_test, "CBD", method)
        # generate_plots.plot_cumulative_errors(y_cbd_test, pred_cbd, 'CBD')

    else:
        print("Not a valid method.")
        return

    if store_results:
        name = method + "_geometric_split"
        height_vals_suburb = list(
            zip(data_suburb.loc[X_sub_test.index].id, pred_suburbs))
        db_funcs.store_predictions(cursor, height_vals_suburb, table, name,
                                   'suburbs')

        height_vals_cbd = list(zip(data_cbd.loc[X_cbd_test.index].id,
                                   pred_cbd))
        db_funcs.store_predictions(cursor, height_vals_cbd, table, name,
                                   'CBDs')

        # Negative: underestimation, positive: overestimation
        # Store relative error and the percentage error in the database.
        rel_errors_suburbs = (pred_suburbs - y_sub_test)
        perc_error_suburbs = ((pred_suburbs - y_sub_test) / y_sub_test) * 100
        error_val_suburb = list(
            zip(data_suburb.loc[X_sub_test.index].id, rel_errors_suburbs,
                perc_error_suburbs))
        db_funcs.store_errors(cursor, error_val_suburb, table, name, 'suburbs')

        rel_errors_cbd = (pred_cbd - y_cbd_test)
        perc_error_cbd = ((pred_cbd - y_cbd_test) / y_cbd_test) * 100
        error_vals_cbd = list(
            zip(data_cbd.loc[X_cbd_test.index].id, rel_errors_cbd,
                perc_error_cbd))
        db_funcs.store_errors(cursor, error_vals_cbd, table, name, 'CBD')