def cv_svr_models(stockmodel, option_type, random_state):
    """
    For the given stockmodel and option type do a 3-fold cross validation of 50 random parametersets.

    Saves all the cross validations in "SVR-random_search_{stockmodel}_{option_type}_scaled_random{random_state}"

    :param stockmodel: str, "BS", "VG" or "H"
    :param option_type: str, "opt_standard", "opt_asianmean", "opt_lookbackmin" or "opt_lookbackmax"
    :param random_state: int, for the randomstate
    """
    datamanager = dc.DataManager(stockmodel=stockmodel, option_type=option_type)
    X, y = datamanager.get_training_data()

    # het SVR gaat veel sneller en presteert veel beter als de data wordt herschaald
    scaler = preprocessing.StandardScaler().fit(X, y)
    X = scaler.transform(X)

    svr = SVR(cache_size=1000)
    clf = RandomizedSearchCV(svr, distributions, random_state=random_state, cv=3, n_iter=50, verbose=10, n_jobs=6,
                             scoring=['neg_mean_squared_error', 'r2'],
                             refit=False)

    performance = clf.fit(X, y)

    modelsaver.save_model(performance, f"SVR-random_search_{stockmodel}_{option_type}")
Exemplo n.º 2
0
def main_cv():
    models = ["BS", "VG", "H"]
    columns_fitting = [
        "opt_standard", "opt_asianmean", "opt_lookbackmin", "opt_lookbackmax"
    ]

    start_random_state = 594

    n_tests = 50
    scale = True

    # CV of the Black-Scholes formula first
    stockmodel = "BS"
    option = "opt_exact_standard"

    print(f"Start cv for {stockmodel}-{option}")
    random_state = start_random_state + 30
    results = cv_layers(n_tests,
                        stockmodel,
                        option,
                        random_state=random_state,
                        scale=scale,
                        epochs=50,
                        cv=3)

    string_scaled = '_scaled' if scale else ""
    modelsaver.save_model(
        results, f"NN-random_search_{stockmodel}_{option}{string_scaled}")

    for i, stockmodel in enumerate(models):
        for j, option in enumerate(columns_fitting):
            print(f"Start cv for {stockmodel}-{option}")
            random_state = start_random_state + 10 * i + j * 2
            results = cv_layers(n_tests,
                                stockmodel,
                                option,
                                random_state=random_state,
                                scale=scale,
                                epochs=50,
                                cv=3)

            string_scaled = '_scaled' if scale else ""
            modelsaver.save_model(
                results,
                f"NN-random_search_{stockmodel}_{option}{string_scaled}")
    print("End")
Exemplo n.º 3
0
def cv_gpr_models(stockmodel, option, random_state=None, scale=False):
    """
    For the given stockmodel and option type do a 3-fold cross validation of 50 random parametersets.

    Saves all the cross validations in f"GPR-random_search_{stockmodel}_{option}{string_scaled}_random{random_state}"

    :param stockmodel: str, "BS", "VG" or "H"
    :param option_type: str, "opt_standard", "opt_asianmean", "opt_lookbackmin" or "opt_lookbackmax"
    :param random_state: int, for the randomstate
    """
    kernels = [RBF(), Matern(), DotProduct(), RationalQuadratic()]
    param_grid = {
        "normalize_y": [True, False],
        'kernel': kernels,
        "alpha": uniform(loc=0.000000001, scale=0.001)
    }

    datamanager = dc.DataManager(stockmodel=stockmodel, option_type=option)
    X, y = datamanager.get_random_training_data(10000)

    if scale:
        scaler = preprocessing.StandardScaler().fit(X, y)
        X = scaler.transform(X)

    gpr = gaussian_process.GaussianProcessRegressor(optimizer="fmin_l_bfgs_b")
    clf = RandomizedSearchCV(gpr,
                             param_grid,
                             random_state=random_state,
                             cv=3,
                             n_iter=50,
                             verbose=10,
                             n_jobs=2,
                             scoring=['neg_mean_squared_error', 'r2'],
                             refit=False)

    performance = clf.fit(X, y)

    string_scaled = '_scaled' if scale else ""
    modelsaver.save_model(
        performance, f"GPR-random_search_{stockmodel}_{option}{string_scaled}")
Exemplo n.º 4
0
def rf_n_estimators(stockmodel="BS",
                    option_type="opt_exact_standard",
                    range_n_estimators=range(50, 751, 50),
                    save_mse=True,
                    max_features="auto",
                    scale=True):
    """
    Method to calculate the mse for a range of estimators

    :param stockmodel: str, "BS", "VG" or "H"
    :param option_type: str, "opt_standard", "opt_asianmean", "opt_lookbackmin" or "opt_lookbackmax"
            If stockmodel = "BS" -> "opt_exact_standard" is also possible
    :param range_n_estimators: list with the number of estimators for each run
    :param save_mse: bool, whenever to save all the values in a file.
    :param max_features: "auto", "log2" or a integer, for the splits in the Tree stockmodel
    :param scale: bool, if the data needs to be scaled or not
    :return: dict,with keys "Train", "Test", "oob_score", "n_estimators".
            Train = mse of the Training data
            Test = mse of the Test data
            oob_score = mse of the out-of-bag observations
            n_estimators = list of the number of estimators
    """

    dict_option_types = {
        "opt_exact_standard": "SE",
        "opt_standard": "S",
        "opt_asianmean": "A",
        "opt_lookbackmin": "Lmin",
        "opt_lookbackmax": "Lmax"
    }

    list_results_train = []
    list_results_test = []
    list_oob_score = []

    datamanager = dc.DataManager(stockmodel=stockmodel,
                                 option_type=option_type)

    X, y = datamanager.get_training_data()
    X_test, y_test = datamanager.get_test_data()

    if scale:
        scaler = preprocessing.StandardScaler().fit(X, y)
        X = scaler.transform(X)
        X_test = scaler.transform(X_test)

    for n_estimator in range_n_estimators:
        rf_model = RandomForestRegressor(n_estimators=n_estimator,
                                         verbose=1,
                                         n_jobs=7,
                                         random_state=2458 + n_estimator,
                                         max_features=max_features,
                                         oob_score=True)
        rf_model.fit(X, y)

        mse_train = mean_squared_error(y, rf_model.predict(X))
        mse_test = mean_squared_error(y_test, rf_model.predict(X_test))
        oob_score = rf_model.oob_score_

        print(f'Train {mse_train}')
        print(f'Test {mse_test}')
        print(f'OOB score: {oob_score}')

        list_results_train.append(mse_train)
        list_results_test.append(mse_test)
        list_oob_score.append(oob_score)

    dict_result = {
        "Train": list_results_train,
        "Test": list_results_test,
        "oob_score": list_oob_score,
        "n_estimators": range_n_estimators
    }

    if save_mse:
        string_scaled = "_scaled" if scale else ""
        modelsaver.save_model(
            dict_result,
            f"rf_{min(range_n_estimators)}-{max(range_n_estimators)}"
            f"-results_train_test-{stockmodel}-{dict_option_types[option_type]}"
            f"-{max_features}{string_scaled}")

    return dict_result