示例#1
0
def full_data_training(stockmodel,
                       option_type,
                       only_call=False,
                       with_percentage=False):
    """
    print the results of the performance over the part of the dataset(*) for the given stock stockmodel and option type

    (*) hardware problems when full dataset is given.

    :param stockmodel: str, "BS", "VG" or "H"
    :param option_type: str, "opt_standard", "opt_asianmean", "opt_lookbackmin" or "opt_lookbackmax"
    :param only_call: bool (default=False), if the dataset only contains the call options
    :param with_percentage: bool (default=False),
            if the dataset needs to contain the percentage of the stock price and the strike
    """
    n_samples = 10000
    random_state = 9943

    base_file_name = "GPR-random_search_{0}_{1}_scaled.p".format(
        stockmodel, option_type)

    full_file_name = pkg_resources.open_text(random_search_gpr,
                                             base_file_name).name
    dict_cv_results = modelsaver.get_model(full_file_name).cv_results_
    best_position = np.where(
        dict_cv_results['rank_test_neg_mean_squared_error'] == 1)
    best_model_parameters = np.array(
        dict_cv_results['params'])[best_position][0]

    dm = dc.DataManager(stockmodel=stockmodel,
                        option_type=option_type,
                        only_call=only_call,
                        with_percent=with_percentage)
    X_train, y_train, x_not_selected, y_not_selected = dm.get_random_training_data(
        n_samples=n_samples,
        random_state=random_state,
        get_not_selected_data=True)

    scaler = preprocessing.StandardScaler().fit(X_train, y_train)
    X_train = scaler.transform(X_train)

    gpr_model = gaussian_process.GaussianProcessRegressor(
        kernel=best_model_parameters["kernel"],
        normalize_y=best_model_parameters["normalize_y"],
        alpha=best_model_parameters["alpha"])

    gpr_model.fit(X_train, y_train)

    X_test, y_test = dm.get_test_data()
    X_test = scaler.transform(X_test)
    x_not_selected = scaler.transform(x_not_selected)

    y_pred = gpr_model.predict(X_test)
    mse_test = mean_squared_error(y_test, y_pred=y_pred)

    y_pred_not_selected = gpr_model.predict(x_not_selected)
    mse_not_selected = mean_squared_error(y_not_selected, y_pred_not_selected)

    print(f"MSE(test data): {mse_test}")
    print(f"MSE(not selected): {mse_not_selected}")
def part_dataset_like_gpr(stockmodel, option_type, only_call=False):
    """
   Do the testings with a smaller set of datapoints, the same as the test for the Gaussian Process Regressor
   Print the mse of the Test data and the part of the training data which are not used

   :param stockmodel: str, "BS", "VG" or "H"
   :param option_type: str, "opt_standard", "opt_asianmean", "opt_lookbackmin" or
   :param only_call: bool (default=False), if the dataset only contains the call options
   :param with_percentage: bool (default=False),
           if the dataset needs to contain the percentage of the stock price and the strike
   :param scale: bool (default=False), whenever to scale the data
   """
    n_samples = 10000
    random_state = 9943

    base_file_name = "SVR-random_search_{0}_{1}_scaled.p".format(stockmodel, option_type)

    # get the best parameters from the cross validation
    full_file_name = pkg_resources.open_text(random_search_svr, base_file_name).name
    dict_cv_results = modelsaver.get_model(full_file_name).cv_results_
    best_position = np.where(dict_cv_results['rank_test_neg_mean_squared_error'] == 1)
    best_model_parameters = np.array(dict_cv_results['params'])[best_position][0]

    # get the training and test data
    dm = dc.DataManager(stockmodel=stockmodel, option_type=option_type, only_call=only_call)
    X_train, y_train, x_not_selected, y_not_selected = dm.get_random_training_data(n_samples=n_samples,
                                                                                   random_state=random_state,
                                                                                   get_not_selected_data=True)

    scaler = preprocessing.StandardScaler().fit(X_train, y_train)
    X_train = scaler.transform(X_train)

    svr_model = SVR(cache_size=2000,
                    C=best_model_parameters['C'],
                    degree=best_model_parameters['degree'],
                    epsilon=best_model_parameters['epsilon'],
                    gamma=best_model_parameters['gamma'],
                    kernel=best_model_parameters['kernel'])

    svr_model.fit(X_train, y_train)

    X_test, y_test = dm.get_test_data()
    X_test = scaler.transform(X_test)
    x_not_selected = scaler.transform(x_not_selected)

    y_pred = svr_model.predict(X_test)
    mse_test = mean_squared_error(y_test, y_pred=y_pred)

    y_pred_not_selected = svr_model.predict(x_not_selected)
    mse_not_selected = mean_squared_error(y_not_selected, y_pred_not_selected)

    print(f"MSE(test data): {mse_test:4.3f}")
    print(f"MSE(not selected): {mse_not_selected:4.3f}")
示例#3
0
def plotting_results_cv_svr():
    # todo: verder aan werken
    dict_cv_results = modelsaver.get_model("SVR-random_search.p").cv_results_

    ranks = dict_cv_results['rank_test_neg_mean_squared_error']
    best_positions = np.where(ranks <= 1)

    # print(best_positions)

    print(np.array(dict_cv_results['params'])[best_positions])
    print(dict_cv_results['mean_test_neg_mean_squared_error'][best_positions])
    # beste resultaten zijn een poly met graad 2 (mse=125) nadien rbf (mse=254)

    print(dict_cv_results)
    return None
示例#4
0
def get_best_model(stockmodel, option_type):
    """
    Function to return the best NN model from the cross validations
    :param stockmodel: str, "BS", "VG" or "H"
    :param option_type: str, "opt_standard", "opt_asianmean", "opt_lookbackmin" or "opt_lookbackmax"
    :return: tuple of lists
            (size_layers, activations)
    """
    base_file_name = f"NN-random_search_{stockmodel}_{option_type}_scaled.p"

    full_file_name = pkg_resources.open_text(random_search_nn,
                                             base_file_name).name

    results = modelsaver.get_model(full_file_name)
    results.sort(key=lambda x: x["cv_result"]["Mean"])

    return results[0]['size_layers'], results[0]['activations']
def full_data_training(stockmodel, option_type, only_call=False, with_percentage=False):
    """
    print the results of the performance over the full dataset for the given stock stockmodel and option type

    :param stockmodel: str, "BS", "VG" or "H"
    :param option_type: str, "opt_standard", "opt_asianmean", "opt_lookbackmin" or "opt_lookbackmax"
    :param only_call: bool (default=False), if the dataset only contains the call options
    :param with_percentage: bool (default=False),
            if the dataset needs to contain the percentage of the stock price and the strike
    """
    base_file_name = "SVR-random_search_{0}_{1}_scaled.p".format(stockmodel, option_type)

    # get the best parameters from the cross validation
    full_file_name = pkg_resources.open_text(random_search_svr, base_file_name).name
    dict_cv_results = modelsaver.get_model(full_file_name).cv_results_
    best_position = np.where(dict_cv_results['rank_test_neg_mean_squared_error'] == 1)
    best_model_parameters = np.array(dict_cv_results['params'])[best_position][0]

    dm = dc.DataManager(stockmodel=stockmodel,
                        option_type=option_type,
                        only_call=only_call,
                        with_percent=with_percentage)
    X_train, y_train = dm.get_training_data()

    scaler = preprocessing.StandardScaler().fit(X_train, y_train)
    X_train = scaler.transform(X_train)

    svr_model = SVR(cache_size=2000,
                    C=best_model_parameters['C'],
                    degree=best_model_parameters['degree'],
                    epsilon=best_model_parameters['epsilon'],
                    gamma=best_model_parameters['gamma'],
                    kernel=best_model_parameters['kernel'])

    svr_model.fit(X_train, y_train)

    X_test, y_test = dm.get_test_data()
    X_test = scaler.transform(X_test)

    y_pred = svr_model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred=y_pred)

    print(f"MSE: {mse:4.3f}")
示例#6
0
def rf_plot_train_test(model, column_fitting, save_plot=False):
    """

    :param model:
    :param column_fitting:
    :param save_plot:
    :return:
    """
    # todo: comments
    max_features = ["auto", "log2", 5]
    dict_codes = {
        "opt_standard": "S",
        "opt_asianmean": "A",
        "opt_lookbackmin": "Lmin",
        "opt_lookbackmax": "Lmax",
        "opt_exact_standard": "SE"
    }

    opt_type_code = dict_codes[column_fitting]

    base_file_name = "rf_50-1000-results_train_test-{0}-{1}-{2}.p"

    pickle_files = [
        base_file_name.format(model, opt_type_code, feature)
        for feature in max_features
    ]
    file_names = [
        pkg_resources.open_text(train_test_rf, pickle_file).name
        for pickle_file in pickle_files
    ]

    dict_values = [modelsaver.get_model(file_name) for file_name in file_names]

    dict_plotting = {}
    for feature, results in zip(max_features, dict_values):
        dict_plotting[feature] = results['Test']

    plot_results("RF",
                 model,
                 column_fitting,
                 dict_plotting,
                 range(50, 1001, 50),
                 save_plot=save_plot)