Пример #1
0
 def model_gaussian_process(self):
     # kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
     kernel = DotProduct() + WhiteKernel()
     # gpr = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)
     model = GaussianProcessRegressor(kernel=kernel) #, random_state=0)
     model.fit(self.train_x, self.train_y)
     model.score(self.train_x, self.train_y)
     self.y_pred, sigma = model.predict(self.test_x, return_std=True)
    def estimate_map(self, sampled_map, mask, meta_data):
        """
        :param sampled_map:  the sampled map with incomplete entries, 2D array with shape (n_grid_points_x,
                                                                                           n_grid_points_y)
        :param mask: is a binary array of the same size as the sampled map:
        :param meta_map: is a binary array of the same size as the sampled map
        :return: the reconstructed map,  2D array with the same shape as the sampled map

        """
        mask_comb_meta = mask - meta_data

        # obtain kernel_coefficients
        sampled_map = sampled_map[:, :, 0]
        n_grid_points_x = sampled_map.shape[0]
        n_grid_points_y = sampled_map.shape[1]
        avail_measur_indices = np.where(mask_comb_meta == 1)

        x_array = np.linspace(0, self.x_length, n_grid_points_x)
        y_array = np.linspace(0, self.y_length, n_grid_points_y)

        n_measurements = len(avail_measur_indices[0])
        power_meas_vec = np.zeros(n_measurements)

        all_avail_points = np.array([
            x_array[avail_measur_indices[1]], y_array[avail_measur_indices[0]]
        ])
        all_avail_points_pro = np.transpose(all_avail_points)

        for ind_1 in range(n_measurements):
            power_meas_vec[ind_1] = sampled_map[
                avail_measur_indices[0][ind_1]][avail_measur_indices[1][ind_1]]

        # Fit the data
        kernel = RBF()
        gpr = GaussianProcessRegressor(kernel=kernel,
                                       random_state=1,
                                       alpha=3e-1,
                                       n_restarts_optimizer=2,
                                       normalize_y=True).fit(
                                           all_avail_points_pro,
                                           power_meas_vec)
        gpr.score(all_avail_points_pro, power_meas_vec)

        # Estimate the map
        estimated_map = np.zeros(sampled_map.shape)
        for ind_y in range(len(y_array)):
            for ind_x in range(len(x_array)):
                if mask_comb_meta[
                        ind_y, ind_x] == 1 and self.estimate_missing_val_only:
                    estimated_map[ind_y, ind_x] = sampled_map[ind_y, ind_x]
                else:
                    query_point = np.array([x_array[ind_x],
                                            y_array[ind_y]]).reshape(1, -1)
                    estimated_map[ind_y, ind_x] = gpr.predict(query_point,
                                                              return_std=False)
        return np.expand_dims(estimated_map, axis=2)
Пример #3
0
def gpr(x_train, y_train, x_test, y_test):
    gpr = GaussianProcessRegressor(kernel=None, normalize_y=True)
    gpr.fit(x_train, y_train)

    print("Predicted: ", gpr.predict(x_test))
    print("Actual: ", y_test)

    print("Score on Test: ", gpr.score(x_test, y_test))
    print("Score on Validation: ", gpr.score(x_val, y_val))

    return gpr
Пример #4
0
def plot_prediction_color(filename, material):
    df = pd.read_json(filename)
    df_filtered = df.loc[df['breeder_material_name'] == material]

    for k in range(
            1, 100
    ):  #improvement of the dataset we remove the worst tbr values and we had a better enrichment configuration to replace it
        X = list(df_filtered['enrichment_value'])
        y = list(df_filtered['value'])

        kernel = DotProduct() + WhiteKernel()
        gpr = GaussianProcessRegressor(kernel=kernel, random_state=0).fit(X, y)
        gpr.score(X, y)
        row_max_tbr = df_filtered.loc[df_filtered['value'].idxmax()]
        row_min_tbr = df_filtered.loc[df_filtered['value'].idxmin()]

        bounds = [(0, 1), (0, 1)]
        GP = GpOptimiser(X, y, bounds=bounds)

        new_enrichment_value = list(GP.search_for_maximum())

        X.remove(row_min_tbr['enrichment_value'])
        y.remove(row_min_tbr['value'])

        print('new enrichment fraction', new_enrichment_value)
        append_to_json = find_tbr_dict(new_enrichment_value, material, True,
                                       500000)
        #adjust the number of batches with the experiment
        X.append(new_enrichment_value)
        y.append(append_to_json['value'])

        with open(
                'results_new_neutron_source/added_' + str(k) +
                '_result_2_layers_halton_first_wall_neural_network.json',
                'w') as file_object:
            json.dump([append_to_json], file_object, indent=2)

        print('file created')
        df_append = pd.read_json(
            'results_new_neutron_source/added_' + str(k) +
            '_result_2_layers_halton_first_wall_neural_network.json')
        df_filtered = df_filtered.append(df_append,
                                         ignore_index=True,
                                         sort=True)

        idx = df_filtered.index[df_filtered['value'] == row_min_tbr['value']]
        df_filtered = df_filtered.drop(idx[0])

    TBR = y
    print(
        'The max TBR for ' + str(len(X[0])) + ' layers and ' + str(material) +
        ' is', max(TBR))
Пример #5
0
def test_only_score_contains_sample_weight():
    mlflow.sklearn.autolog()

    from sklearn.gaussian_process import GaussianProcessRegressor

    assert "sample_weight" not in _get_arg_names(GaussianProcessRegressor.fit)
    assert "sample_weight" in _get_arg_names(GaussianProcessRegressor.score)

    mock_obj = mock.Mock()

    def mock_score(self, X, y, sample_weight=None):  # pylint: disable=unused-argument
        mock_obj(X, y, sample_weight)
        return 0

    assert inspect.signature(
        GaussianProcessRegressor.score) == inspect.signature(mock_score)

    GaussianProcessRegressor.score = mock_score
    model = GaussianProcessRegressor()
    X, y = get_iris()

    with mlflow.start_run() as run:
        model.fit(X, y)
        mock_obj.assert_called_once_with(X, y, None)

    run_id = run.info.run_id
    params, metrics, tags, artifacts = get_run_data(run_id)
    assert params == truncate_dict(
        stringify_dict_values(model.get_params(deep=True)))
    assert {TRAINING_SCORE: model.score(X, y)}.items() <= metrics.items()
    assert tags == get_expected_class_tags(model)
    assert MODEL_DIR in artifacts
    assert_predict_equal(load_model_by_run_id(run_id), model, X)
def GPR_model(kernelf, X, Y, grid, noise_var=1e-10, optimizerf='fmin_l_bfgs_b',\
              optimize_restarts=5):
    """
    model_type : GPR is the default, rfr (random forest regression or other methods could be added)
    params : parameters of the surrogate model
    data : shoule be a class of continous data with data.x as variables and data.y as features
    cross_validate : If we want to do cross validation for the training.
    grid : The unknown points for the prediction
    
    normalize_y :This parameter should be set to True if the target values’ mean 
    is expected to differ considerable from zero. When enabled, the normalization effectively modifies 
    the GP’s prior based on the data,
    which contradicts the likelihood principle; normalization is thus disabled per default.
    
    fitted_model : The output
    """


    gpr = GaussianProcessRegressor(kernel= kernelf, alpha=noise_var, optimizer=optimizerf,\
                                   n_restarts_optimizer= optimize_restarts, normalize_y=False).fit(X, Y)
    score = gpr.score(X, Y)
    m, s = gpr.predict(grid, return_std=True, return_cov=False)
    s = np.array(s)
    m = np.array(m)
    return score, m.reshape(-1, 1), s.reshape(-1, 1), gpr
Пример #7
0
def perform_Surrogate_Prediction(next_gen_conc, conc_array_actual,
                                 spectra_array_actual):
    """
    Fit a surrogate model.

    Fits a surrogate model to conc_array_actual and
    spectra_array_actual and predicts the spectra of next_gen_conc

    Inputs:
        - next_gen_conc: A 2d array of the concentrations from
        the current iteration
        - conc_array_actual: A 2d array of the concentrations from
        all the previous iterations
        - spectra_array_actual: A 2d array of all the concentrations
        from all the previous iterations.

    Outputs:
        - spectra_prediction: A 2d array of the predicted spectra of
        next_gen_conc
        - score: The score of the surrogate model
    """
    gpr = GaussianProcessRegressor().fit(conc_array_actual,
                                         spectra_array_actual)
    score = gpr.score(conc_array_actual, spectra_array_actual)
    spectra_prediction = gpr.predict(next_gen_conc)
    return spectra_prediction, score
Пример #8
0
def train_regressor(dist_frame_path,
                    out_path='model_checkpoints/regressor.joblib'):
    """
    Train Linear Regression model for mapping pixel distance into miters.
    :param dist_frame_path:
    :return:
    """
    dist_frame = pd.read_csv(dist_frame_path)

    # filter frame
    dist_frame = dist_frame[dist_frame.true_dist >= 0]

    dist_frame = dist_frame.reset_index(drop=True)
    train = dist_frame[['true_dist', 'hausdorff_dists', 'dist_line_len']]

    # delete outliers
    # train = train[(np.abs(stats.zscore(train)) < 3).all(axis=1)]

    X, y = np.array(train[['hausdorff_dists',
                           'dist_line_len']]), np.array(train['true_dist'])

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    kernel = DotProduct() + WhiteKernel()
    gpr = GaussianProcessRegressor(kernel=kernel, random_state=0)
    gpr.fit(X_train, y_train)
    print('###NOTE###____ Regressor R2-score = ', gpr.score(X_test, y_test))

    dump(gpr, out_path)

    return gpr
Пример #9
0
 def fnGaussianProcessRegressor(self, year, avgTemp, predictYear):
     feature_train, feature_test, target_train, target_test = train_test_split(
         year, avgTemp, test_size=0.1, random_state=42)
     gp = GaussianProcessRegressor(kernel=1.0 * RBF([1.0]))
     gp.fit(feature_train[:, np.newaxis], target_train)
     return (gp.score(feature_test[:, np.newaxis],
                      target_test), gp.predict(predictYear))
Пример #10
0
    def plot_RECa124_tc(self):
        self.get_args()
        data = pd.read_csv(self.file_list[0])
        data = data.dropna(how="any")
        print(data)

        fig, ax = plt.subplots(figsize=(7, 5))

        norm = Normalize(vmin=data['ionic_radius'].min(), vmax=1.08)
        cmap = get_cmap('seismic')
        mappable = ScalarMappable(cmap=cmap, norm=norm)
        mappable._A = []

        re = ['Y', "Dy", "Gd", "Eu", "Sm"]
        re_ir = [1.019, 1.027, 1.053, 1.066, 1.079]
        re_ir = [0, 0.2, 0.6, 0.7, 0.9]

        for r, rir in zip(re, re_ir):
            print(rir)
            data2 = data[data['RE'] == r]
            print(data2)
            x = data2['Ca_x'].values.reshape(-1, 1)
            y = data2["Tc_onset"].values.reshape(-1, 1)
            # lr = LinearRegression()
            lr = GaussianProcessRegressor()
            lr.fit(x, y)
            # print(lr.coef_)
            # print(lr.intercept_)
            print(lr.score(x, y))
            lrx = np.linspace(0, 0.15, 1000).reshape(-1, 1)
            ax.plot(lrx,
                    lr.predict(lrx),
                    color=cmap(rir),
                    zorder=-1,
                    alpha=0.5,
                    lw=5)
        # =======================================================

        ax.scatter(data['Ca_x'],
                   data["Tc_onset"],
                   marker='o',
                   s=200,
                   c=data['ionic_radius'],
                   cmap=cmap)
        fig.colorbar(mappable).set_label(r"$RE^{3+}$ ionic radius / $\rm\AA$")
        ax.set_xlim(0, 0.1)
        ax.set_ylim(70, 95)
        ax.set_yticks(range(70, 96, 5))
        ax.set_xticks([0, 0.05, 0.10])
        ax.tick_params(length=5, pad=8)
        ax.tick_params(right=True, top=True)
        ax.set_xlabel(r"$\it{x}$")
        ax.set_ylabel(r"$\it{T}_{\mathsf{c}}$ / K")
        # plt.gca().spines['left'].set_visible(False)
        # plt.gca().spines['right'].set_visible(False)
        # plt.tick_range()
        fig.tight_layout()
        plt.show()
Пример #11
0
def perform_Surrogate_Prediction(next_gen_conc, conc_array_actual,
                                 spectra_array_actual):
    #lr = LinearRegression().fit(conc_array_actual, spectra_array_actual)
    #score = lr.score(conc_array_actual, spectra_array_actual)
    #spectra_prediction = lr.predict(next_gen_conc)
    gpr = GaussianProcessRegressor().fit(conc_array_actual,
                                         spectra_array_actual)
    score = gpr.score(conc_array_actual, spectra_array_actual)
    spectra_prediction = gpr.predict(next_gen_conc)
    return spectra_prediction, score
Пример #12
0
    def plot_RE124_tc(self):
        """
        mt_.*\.ini => tc
        """
        self.get_args()
        data = pd.read_csv(self.file_list[0])
        data = data.dropna(how="any")
        print(data)

        fig, ax = plt.subplots(figsize=(7, 3))

        x = data['ionic_radius'].values.reshape(-1, 1)
        y = data["tc"].values.reshape(-1, 1)
        # lr = LinearRegression()
        lr = GaussianProcessRegressor()
        lr.fit(x, y)
        # print(lr.coef_)
        # print(lr.intercept_)
        print(lr.score(x, y))

        lrx = np.linspace(0, 1.3, 1000).reshape(-1, 1)
        ax.plot(lrx, lr.predict(lrx), color="m", zorder=-1, alpha=0.3, lw=5)

        # ax.errorbar(data['ionic_radius'], data["a"], yerr=data["da"], fmt='o', color="red", markersize=10, capsize=5, markeredgecolor="black")
        # ax.errorbar(data['ionic_radius'], data["b"], yerr=data["db"], fmt='^', color="blue", markersize=10, capsize=5, markeredgecolor="black")
        # ax.errorbar(data['ionic_radius'], data["c"], yerr=data["dc"], fmt='s', color='m',ecolor="m",markersize=10, capsize=5, markeredgecolor="black")
        # ax.errorbar(data['ionic_radius'], data["o"], yerr=data["do"], fmt='o', color="m",markersize=10, capsize=5, markeredgecolor="black")
        ax.scatter(data['ionic_radius'],
                   data["tc"],
                   marker='o',
                   color="m",
                   s=100)

        ax.tick_params(labelbottom=False)
        # ax.set_xlim(1.0, 1.12)
        # ax.set_ylim(3.84, 3.90)
        # ax.set_yticks([3.84, 3.87, 3.90])
        # ax.set_xlabel(r"$RE^{3+}$ ionic radius / $\rm\AA$")
        # ax.set_ylabel(r"lattice parameter / $\rm\AA$")
        ax.set_ylim(60, 90)
        ax.set_ylabel(r"$\it{T}_{\mathsf{c}}$ / K")
        # ax.set_ylabel("orthorhombicity / -")

        # ax.set_ylabel(r"$\it{T}_{\mathsf{c}}$ / K")
        # ax.errorbar(data['ionic_radius'], data["c"], yerr=data["dc"], fmt='s', color='m',ecolor="m",markersize=10, capsize=5, markeredgecolor="black")
        ax.set_xlim(1.0, 1.12)
        # ax.set_ylim(27.2, 27.4)
        # ax.set_yticks(range(70, 96, 5))
        # ax.set_xticks([0, 0.05, 0.10])
        ax.tick_params(length=5, pad=8)
        ax.tick_params(right=True, top=True)

        fig.tight_layout()
        plt.savefig("tc_RE124.png", transparent=True, dpi=500)
        plt.show()
Пример #13
0
 def trainModel(self):
     for referenceVector in self.data.rvecs:
         allBetas = []
         for iVector in self.data.ivecs:
             gpr = GaussianProcessRegressor(random_state=0).fit(np.array(iVector).reshape(-1, 1), np.array(referenceVector))
             allBetas.append(gpr.score(np.array(iVector).reshape(-1, 1), np.array(referenceVector)))
         print("Betas:", len(allBetas), "TFIDF", len(self.data.tfidf))
         if len(allBetas) == len(self.data.tfidf):
             self.finalscores.append(np.dot(allBetas, self.data.tfidf))
         else:
             if len(allBetas) > len(self.data.tfidf):
                 self.finalscores.append(np.dot(allBetas[0:len(self.data.tfidf)], self.data.tfidf))
             else:
                 self.finalscores.append(np.dot(allBetas, self.data.tfidf[0:len(allBetas)]))
Пример #14
0
def GP(x, y, x_eval):
    kernel = C(1.0, (1e-2, 1e2)) * RBF(100, (1e-2, 1e2))
    #kernel = K.RationalQuadratic(length_scale=1, alpha=0.5)
    #kernel = K.Matern(length_scale=1.0, nu=2.0)
    #kernel = RBF(length_scale=100)
    gp = GaussianProcessRegressor(kernel=kernel,
                                  n_restarts_optimizer=9,
                                  normalize_y=True)
    X = x.reshape(len(x), 1)
    gp.fit(X, y)
    X_eval = x_eval.reshape(len(x_eval), 1)
    y_pred, sigma = gp.predict(X_eval, return_std=True)
    score = gp.score(X, y)
    return y_pred, sigma, score
Пример #15
0
    def do_this():

        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.3,
                                                            random_state=None)

        kernel = 1 * RBF([10] * X_train.shape[1], (1e-5, 1e5)) + WhiteKernel(
            noise_level=1, noise_level_bounds=(1e-10, 1e+1))

        gpr = GaussianProcessRegressor(kernel=kernel,
                                       alpha=1.0e-5,
                                       n_restarts_optimizer=RESTARTS,
                                       normalize_y=True,
                                       optimizer='fmin_l_bfgs_b')

        gpr.fit(X_train, y_train)

        y_pred = gpr.predict(X)

        mse_all = mse(y, y_pred)
        R_test = gpr.score(X_test, y_test)
        R_tr = gpr.score(X_train, y_train)
        return mse_all, R_test, R_tr, gpr, X_train, X_test, y_train, y_test
Пример #16
0
def gaussian_reg(df):
    df = df.iloc[:1000, :]
    X = df[[
        'promotion_type1', 'promotion_type4', 'promotion_type6',
        'promotion_type10', 'date_month'
    ]]
    y = df['quantity']
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=0)
    kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
    gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)

    gp.fit(X_train, y_train)
    print(gp.score(X_test, y_test))
def makeGaussianProcess():
    global y_t_pred, result
    prefix = "%s_GP_FULL" % (name)
    #kernel = RBF(1e1,(1e-5,1e7))
    kernel = RationalQuadratic()  #(1e1,(1e-5,1e7))
    #kernel = ExpSineSquared()#(1e1,(1e-5,1e7))
    model = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)
    x1 = x[:, 3:6:2]
    x_t1 = x_t[:, 3:6:2]
    y_t_pred = model.fit(x1, y).predict(x_t1)
    r = model.score(x1, y)
    print("score r = %s" % r)
    print "Coefficients: %s" % model.get_params()
    #print "Highest Coefficients: %s" % str(sorted(model.get_params(),key=lambda x:-x))
    print str(
        (model.kernel_, model.log_marginal_likelihood(model.kernel_.theta)))
    return prefix, model
Пример #18
0
def main():
    X, y = make_friedman2(
        n_samples=500, noise=0,
        random_state=0)  # This is the test data we are fitting
    print(type(X))
    print(np.shape(X))  # 500 samples each with 4 variables
    print(type(y))
    print(np.shape(y))  # 500 outputs

    kernel = DotProduct() + WhiteKernel()
    gpr = GaussianProcessRegressor(kernel=kernel,
                                   optimizer='fmin_l_bfgs_b',
                                   random_state=0).fit(X, y)
    gpr_score = gpr.score(X, y)  # R2 of the prediction
    print('Prediction of R^2: %f' % gpr_score)

    print("Shape of thing ", np.shape(X[:2, :]))
    print("Thing ", X[:2, :])
    gpr_predict = gpr.predict(X[:2, :], return_std=True)
    print(gpr_predict)
Пример #19
0
class GaussianPrRegressor():

    def __init__(self, dataset):
        self.dataset = dataset
        self.gpr = GaussianProcessRegressor(**DEFAULTS[dataset]['gaussian_pr']['defaults'])
        print("""
			**********************
			Gaussian Process Regressor
			**********************
		""")

    def train_and_predict(self, X, y, X_test):
        '''
        fit training dataset and predict values for test dataset
        '''
        self.gpr.fit(X, y)
        self.gpr.predict(X_test)

    def score(self, X, X_test, y, y_test):
        '''
        Returns the score of knn by fitting training data
        '''
        self.train_and_predict(X, y, X_test)
        return self.gpr.score(X_test, y_test)

    def create_new_instance(self, values):
        return GaussianProcessRegressor(**{**values})

    def param_grid(self, is_random=False):
        '''
        dictionary of hyper-parameters to get good values for each one of them
        '''
        # random search only accepts a dict for params whereas gridsearch can take either a dic or list of dict
        return DEFAULTS[self.dataset]['gaussian_pr']['param_grid']

    def get_sklearn_model_class(self):
        return self.gpr

    def __str__(self):
        return "GaussianProcessRegressor"
class GPR(ContinuousModel):
    """ Gaussian Process Regression """
    def __init__(self, *args, **kwargs):
        self.model = GaussianProcessRegressor(*args, **kwargs)

    def train(self, dataset, *args, **kwargs):
        self.model.fit(*(dataset.format_sklearn() + args), **kwargs)
        return self.model

    def predict(self, feature, *args, **kwargs):
        feature = np.array(feature)
        f = self.model.predict(feature, *args, **kwargs) - np.finfo(float).eps
        return np.sign(f)

    def predict_real(self, feature, *args, **kwargs):
        feature = np.array(feature)
        dvalue = self.model.predict(feature, *args, **kwargs)
        return np.vstack((-dvalue, dvalue)).T

    def predict_mean_var(self, feature, sigma_n=.01, *args, **kwargs):
        feature = np.array(feature)
        t_mean, y_std = self.model.predict(feature,
                                           return_std=True,
                                           *args,
                                           **kwargs)
        t_var = y_std**2 + sigma_n**2  # Gaussian noise model
        return t_mean.flatten(), t_var.flatten()

    def score(self, testing_dataset, *args, **kwargs):
        return self.model.score(*(testing_dataset.format_sklearn() + args),
                                **kwargs)

    def get_kernel(self):
        return self.model.kernel_

    def get_log_marginal_likelihood(self):
        return self.model.log_marginal_likelihood_value_
Пример #21
0
def eval_manual():

    '''
    This should do the much the same thing as the code above, but it seems to 
    give a slightly better score. The results are sill negative though. This
    might just mean that when it subtracts the predicted from the actual values, 
    one set is consitently higher than the other that's where the negative values 
    are coming from.
    '''

    # Create the kernel and the GP
    kernel = CustomKernel()
    gp = GaussianProcessRegressor(kernel=kernel, optimizer=0, alpha=1)

    # Get all of the availiable questions
    questions = np.atleast_2d(range(1, 100)).T

    # Get answers to all of the questions
    answers = np.atleast_2d(ask_user(questions).ravel()).T

    scores = []
    number_of_folds = 10
    for i in range(number_of_folds):

        # Split the data
        X_train, X_test, y_train, y_test = train_test_split(questions, answers, 
                                                            test_size=0.1, random_state=randint(1, 1000000))
        # Fit the data
        gp.fit(X_train, y_train)

        # Add the scores to the list
        scores.append(gp.score(X_test, y_test))

    scores = np.array(scores)
    print(scores)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
Пример #22
0
model = GaussianProcessRegressor(
    kernel=kern,  # kernel instance, default=None
    alpha=0.01,  # float or array-like of shape(n_sample), default=1e-10
    optimizer=
    "fmin_l_bfgs_b",  # "fmin_l_bfgs_b” or callable, default="fmin_l_bfgs_b"
    n_restarts_optimizer=0,  # int, default=0
    normalize_y=False,  # boolean, optional (default: False)
    copy_X_train=True,  # bool, default=True
    random_state=None,  # int or RandomState, default=None
)
model.fit(train_x, train_y)
y_pred, y_std = model.predict(x.reshape(-1, 1), return_std=True)
# y_pred, y_std = model.predict(x.reshape(-1, 1), return_std=True)
log_marginal_likelihood = model.log_marginal_likelihood()  # 対数周辺尤度
params = model.get_params()  # 設定パラメータの取得(辞書)
scores = model.score(train_x, train_y)  # 決定係数R^2
# params = model.set_params()  # 設定パラメータの設定(辞書)
k_samples = model.sample_y(train_x, n_samples=5)  # 事後分布のカーネル関数をランダムに5つサンプリング

X_train = model.X_train_
y_train = model.y_train_
kernel = model.kernel_  # 予測に使用されたカーネル(最適化済みで最初に設定したパラメータとは異なる)
L = model.L_
alpha = model.alpha_
log_marginal_likelihood_value = model.log_marginal_likelihood_value_  # 対数周辺尤度

# plot
fig = plt.figure(figsize=(6, 4))
ax1 = fig.add_subplot(111)
for i in range(k_samples.shape[1]):
    ax1.plot(train_x, k_samples[:, i])
Пример #23
0
#the size of my test set

X_train, X_test, Y_train, Y_test, X_lately = prepare_data(
    df, forecast_col, forecast_out, test_size)
#calling the method were the cross validation and data preperation is in

#initializing  regression model
#learner = KNeighborsRegressor(); #initializing  regression model
#learner = DecisionTreeRegressor()
#learner = MLPRegressor()
kernel = DotProduct() + WhiteKernel()
learner = GaussianProcessRegressor()

learner.fit(X_train, Y_train)
#training the  model
score = learner.score(X_train, Y_train)
score2 = learner.score(X_test, Y_test)
#testing  model
predictions_test = learner.predict(X_test)
predictions_train = learner.predict(X_train)

forecast = learner.predict(X_lately)
#set that will contain the forecasted data
response = {}
#creting json object
response['train_score'] = score * 100
response['test_score'] = score2 * 100
response['forecast_set'] = forecast

print(response)
Пример #24
0
################################
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import matplotlib
import matplotlib.path as path

import matplotlib.patches as patches

from scipy import stats
import statsmodels.api as sm

import pylab as pl
# this allows plots to appear directly in the notebook

from sklearn.gaussian_process import GaussianProcessRegressor

# Import Data

shanghaiTable = pd.read_csv("../input/shanghaiData.csv")
shanghaiData = shanghaiTable.loc[shanghaiTable['year'] == 2015]
shanghaiData = shanghaiData.head(n=100)
feature = ['alumni', 'award', 'hici', 'ns', 'pub', 'pcp']
x = shanghaiData[feature]
y = shanghaiData['total_score']

model = GaussianProcessRegressor()
model.fit(x, y)
print((model.score(x, y)))
Пример #25
0
'''

#Sklearn GPR model
#Reworked code from: 
#https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.GaussianProcessRegressor.html
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
kernel = DotProduct() + WhiteKernel()
gpr = GaussianProcessRegressor(kernel=kernel, random_state=13)

#train model
gpr.fit(np_xTrain, np_yTrain)

#test model
y,_ = gpr.predict(np_xTest, return_std=True) 
score = gpr.score(np_xTest, np_yTest)

y = y.astype(int)

#Print out predicted players from best to worst followed by actual players from best to worst
sort_predict = sorted(y, reverse=True)
sort_players = [x for _,x in sorted(zip(y,test_players), reverse=True)]

for player, predict in zip(sort_players, sort_predict):
    print(player + ": " + str(predict))


y_sort = sorted(np_yTest.ravel(), reverse=True)
real_players = [x for _, x in sorted(zip(np_yTest, test_players), reverse=True)]

for player, true in zip(real_players, y_sort):
Пример #26
0
nop = t_d_inp.shape[0]
indices = np.random.RandomState(seed=42).permutation(nop)
bp = np.int(nop*0.9)
train_idx, dev_idx = indices[:bp], indices[bp:]
train_inp, dev_inp = t_d_inp[train_idx,:], t_d_inp[dev_idx,:]
train_oup, dev_oup = t_d_oup[train_idx,:], t_d_oup[dev_idx,:]
print ("finish load and seperating data!")

# GPR
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
#kernel = ConstantKernel(1.0) * RBF(length_scale=1.0)
kernel = 1.0 * Matern(length_scale=1.0, length_scale_bounds=(1e-1, 10.0),
                        nu=1.5)
gpr = GaussianProcessRegressor(kernel=kernel).fit(train_inp, train_oup)
print (gpr.score(train_inp, train_oup))
predicted_gp1 = gpr.predict(train_inp) 
predicted_gp2 = gpr.predict(dev_inp) 
predicted_gp3 = gpr.predict(test_inp) 
#plot 1
plt.figure(figsize=(10,7))
s = 3
for i in range(t_d_oup.shape[1]):
    plt.subplot(2,3,i+1)
    plt.scatter(train_oup[:,i],predicted_gp1[:,i],s=s,label="training set")
    plt.scatter(  dev_oup[:,i],predicted_gp2[:,i],s=s,label="development set")
    plt.scatter( test_oup[:,i],predicted_gp3[:,i],s=s,label="test set")
    train_error = ((predicted_gp1[:,i] - train_oup[:,i])**2).mean()
    dev_error = ((predicted_gp2[:,i] - dev_oup[:,i])**2).mean()
    test_error = ((predicted_gp3[:,i] - test_oup[:,i])**2).mean()
plt.legend()
Пример #27
0
    X_training_similarity = np.dot(X_training, np.transpose(X_training))
    X_validation_similarity = np.dot(X_validation, np.transpose(X_training))
    X_all_similarity = np.append(X_training_similarity,
                                 X_validation_similarity,
                                 axis=0)
    X_all_similarity = stats.zscore(X_all_similarity)
    X_training_similarity = X_all_similarity[:500, :]
    X_validation_similarity = X_all_similarity[500:, :]
    #tqdm.write(str(X_training_similarity.shape)+','+ str(X_validation_similarity.shape))

    gpr = GaussianProcessRegressor(kernel=kernel,
                                   random_state=0).fit(X_training_similarity,
                                                       Y_training)
    #gpr = SVR(kernel='rbf',C=1e3, gamma='auto').fit(X_training_similarity, Y_training)
    train_R2 = gpr.score(X_training_similarity, Y_training)
    test_R2 = gpr.score(X_validation_similarity, Y_validation)
    train_mae = mae(Y_training, gpr.predict(X_training_similarity))
    test_mae = mae(Y_validation, gpr.predict(X_validation_similarity))
    #print("Training R^2 score:",gpr.score(X_training,Y_training))
    #print("Validation R^2 score:",gpr.score(X_validation,Y_validation))
    #print("Training MAE:",mae(Y_training,gpr.predict(X_training)))
    #print("Validation MAE:",mae(Y_validation,gpr.predict(X_validation)))
    train_mae_arr.append(train_mae)
    train_R2_arr.append(train_R2)
    test_mae_arr.append(test_mae)
    test_R2_arr.append(test_R2)

train_mae_arr = np.array(train_mae_arr)
train_R2_arr = np.array(train_R2_arr)
test_mae_arr = np.array(test_mae_arr)
Пример #28
0
def main():
    X = pd.read_csv(
        '../data/BlackFriday.csv'
    )  # names =("User_ID", "Product_ID", "Gender", "Age", "Occupation", "City_Category", "Stay_In_Current_City_Years", "Marital_Status,", "Product_Category_1","Product_Category_2","Product_Category_3", "Purchase" ))
    N, d = X.shape
    print(N, d)
    # fill missing values with 0
    # (?) need to calculate percentage of missing value?
    X = X.fillna(0)
    # change gender to 0 and 1
    X['Gender'] = X['Gender'].apply(change_gender)
    # change age to 0 to 6
    X['Age'] = X['Age'].apply(change_age)
    # change city categories to 0 to 2
    X['City_Category'] = X['City_Category'].apply(change_city)
    # change the year to integer
    X['Stay_In_Current_City_Years'] = X['Stay_In_Current_City_Years'].apply(
        change_year)

    #predict gender
    y = np.zeros((N, 1))
    y = X.values[:, 2]
    y = y.astype('int')
    X1 = X
    ID = ['User_ID', 'Product_ID', 'Gender']
    X1 = X1.drop(ID, axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X1,
                                                        y,
                                                        test_size=0.20,
                                                        random_state=42)
    model = LogisticRegression(C=1,
                               fit_intercept=False,
                               solver='lbfgs',
                               multi_class='multinomial')
    model.fit(X_train, y_train)
    print("LogisticRegression(softmax) Training error %.3f" %
          utils.classification_error(model.predict(X_train), y_train))
    print("LogisticRegression(softmax) Validation error %.3f" %
          utils.classification_error(model.predict(X_test), y_test))

    model = linear_model.SGDClassifier(max_iter=1000, tol=1e-3)
    model.fit(X_train, y_train)

    print("logLinearClassifier Training error %.3f" %
          utils.classification_error(model.predict(X_train), y_train))
    print("logLinearClassifier Validation error %.3f" %
          utils.classification_error(model.predict(X_test), y_test))

    #predict the product category1  based on other information.
    y2 = np.zeros((N, 1))
    y2 = X.values[:, 8]
    y2 = y2.astype('int')
    X2 = X
    ID = [
        'User_ID', 'Product_ID', 'Product_Category_1', 'Product_Category_2',
        'Product_Category_3'
    ]
    X2 = X2.drop(ID, axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X2,
                                                        y2,
                                                        test_size=0.2,
                                                        random_state=42)

    model = KNeighborsClassifier(n_neighbors=5, metric='cosine')
    model.fit(X_train, y_train)

    y_pred = model.predict(X_train)
    tr_error = np.mean(y_pred != y_train)

    y_pred = model.predict(X_test)
    te_error = np.mean(y_pred != y_test)
    print("Training error of KNN to predict age: %.3f" % tr_error)
    print("Testing error of KNN to predict age: %.3f" % te_error)
    # Training error of KNN to predict age: 0.363
    #Testing error of KNN to predict age: 0.496

    # Use decision tree to predict
    e_depth = 20
    s_depth = 1

    train_errors = np.zeros(e_depth - s_depth)
    test_errors = np.zeros(e_depth - s_depth)

    for i, d in enumerate(range(s_depth, e_depth)):
        print("\nDepth: %d" % d)

        model = DecisionTreeClassifier(max_depth=d,
                                       criterion='entropy',
                                       random_state=1)
        model.fit(X_train, y_train)

        y_pred = model.predict(X_train)
        tr_error = np.mean(y_pred != y_train)

        y_pred = model.predict(X_test)
        te_error = np.mean(y_pred != y_test)
        print("Training error: %.3f" % tr_error)
        print("Testing error: %.3f" % te_error)

        train_errors[i] = tr_error
        test_errors[i] = te_error

    x_vals = np.arange(s_depth, e_depth)
    plt.title("The effect of tree depth on testing/training error")
    plt.plot(x_vals, train_errors, label="training error")
    plt.plot(x_vals, test_errors, label="testing error")
    plt.xlabel("Depth")
    plt.ylabel("Error")
    plt.legend()

    fname = os.path.join("..", "figs", "trainTest_category1.pdf")
    plt.savefig(fname)
    print("\nFigure saved as '%s'" % fname)

    model = RandomForestClassifier(criterion="entropy",
                                   n_estimators=5,
                                   max_features=5)
    model.fit(X_train, y_train)
    print("RandomForest Training error %.3f" %
          utils.classification_error(model.predict(X_train), y_train))
    print("RandomForest Validation error %.3f" %
          utils.classification_error(model.predict(X_test), y_test))
    #RandomForest Training error 0.027
    #RandomForest Validation error 0.157
    tree = DecisionTreeClassifier(max_depth=13,
                                  criterion='entropy',
                                  random_state=1)
    tree.fit(X_train, y_train)
    y_pred = tree.predict(X_train)
    tr_error = np.mean(y_pred != y_train)

    y_pred = tree.predict(X_test)
    te_error = np.mean(y_pred != y_test)
    print("Decision Tree Training error : %.3f" % tr_error)
    print("Decision Tree Validation error: %.3f" % te_error)
    #Depth: 11
    #Training error: 0.127
    #Testing error: 0.131

    #use softmaxClassifier to predict occputation
    model = LogisticRegression(C=10000,
                               fit_intercept=False,
                               solver='lbfgs',
                               multi_class='multinomial')
    model.fit(X_train, y_train)
    print("LogisticRegression(softmax) Training error %.3f" %
          utils.classification_error(model.predict(X_train), y_train))
    print("LogisticRegression(softmax) Validation error %.3f" %
          utils.classification_error(model.predict(X_test), y_test))
    #LogisticRegression(softmax) Training error 0.651
    #LogisticRegression(softmax) Validation error 0.652

    from sklearn.preprocessing import PolynomialFeatures
    from sklearn.linear_model import LinearRegression
    from sklearn.gaussian_process.kernels import ConstantKernel, RBF
    from sklearn.kernel_ridge import KernelRidge
    from sklearn.gaussian_process import GaussianProcessRegressor
    from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
    from sklearn.metrics import mean_squared_error
    poly = PolynomialFeatures(degree=4)
    X_train_sub = X_train[:1000]
    y_train_sub = y_train[:1000]
    X_train_ = poly.fit_transform(X_train_sub)
    model = LinearRegression()
    model.fit(X_train_, y_train_sub)
    model.score(X_train_, y_train_sub, sample_weight=None)
    y_pred = model.predict(X_train_)
    tr_error = mean_squared_error(y_pred, y_train_sub)

    y_pred = model.predict(X_test)
    te_error = np.mean(y_pred != y_test)
    print("Training error : %.3f" % tr_error)
    print("Validation error: %.3f" % te_error)

    #kernel = DotProduct() + WhiteKernel()
    y2 = np.zeros((N, 1))
    y2 = X.values[:, 8]
    y2 = y2.astype('int')
    X2 = X
    ID = [
        'User_ID', 'Product_ID', 'Product_Category_1', 'Product_Category_2',
        'Product_Category_3'
    ]
    X2 = X2.drop(ID, axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X2,
                                                        y2,
                                                        test_size=0.02,
                                                        random_state=42)
    gpr = GaussianProcessRegressor(kernel=None,
                                   random_state=0).fit(X_train, y_train)
    gpr.score(X_train, y_train)
    y_pred = gpr.predict(X_train)
    tr_error = mean_squared_error(y_pred, y_train)
    y_pred = gpr.predict(X_test)
    te_error = mean_squared_error(y_pred, y_test)
    clf = KernelRidge(alpha=0.5)
    clf.fit(X_train_sub, y_train_sub)
    clf.score(X_train_sub, y_train_sub, sample_weight=None)
Пример #29
0
def run_auto_lr_range(train_loader, bae_model, mode="mu", sigma_train="separate",
                      min_lr_range=0.0000001, max_lr_range=10,
                      reset_params=False, plot=True, verbose=True, save_mecha="copy", run_full=False, savefile="", savefolder="plots", supervised=False, window_size=10):
    #helper function
    def round_sig(x, sig=2):
        return round(x, sig-int(floor(log10(abs(x))))-1)

    #get number of iterations for a half cycle based on train loader
    total_iterations = len(train_loader)
    half_iterations = int(total_iterations/2)

    #save temporary model state
    #depending on chosen mechanism
    if save_mecha == "file":
        bae_model.save_model_state()
    elif save_mecha == "copy":
        temp_autoencoder = copy.deepcopy(bae_model.autoencoder)

    #reset it before anything
    if reset_params:
        bae_model.reset_parameters()

    bae_model.scheduler_enabled = False
    #learning range list
    lr_list = []
    train_batch_number = len(train_loader) #num iterations
    for i in range(train_batch_number):
        q= (max_lr_range/min_lr_range)**(1/train_batch_number)
        lr_i = min_lr_range*(q**i)
        lr_list.append(lr_i)


    #forward propagate model to get loss vs learning rate
    sigma_train = sigma_train
    mode = mode
    loss_list = []
    current_minimum_loss = 0
    smoothen_loss_list = []

    if verbose:
        print("Starting auto learning rate range finder")

    try:
        for batch_idx, (data, target) in tqdm(enumerate(train_loader)):
            bae_model.learning_rate = lr_list[batch_idx]
            bae_model.learning_rate_sig = lr_list[batch_idx]
            bae_model.set_optimisers(bae_model.autoencoder, mode=mode,sigma_train=sigma_train)
            if supervised:
                loss = bae_model.fit_one(x=data,y=target, mode=mode)
            else:
                loss = bae_model.fit_one(x=data,y=data, mode=mode)
            loss_list.append(loss)
            if (batch_idx+1)>=window_size:
                #first time, fill up with mean
                if len(smoothen_loss_list) == 0:
                    smoothen_loss_list.append(np.mean(copy.copy(loss_list[0:window_size])))
                    current_minimum_loss = copy.copy(smoothen_loss_list[0])
                else:
                #calculate exponential ma
                    k = 2/(window_size+1)
                    smoothen_loss = (loss * k) + smoothen_loss_list[-1]*(1-k)
                    if smoothen_loss <= current_minimum_loss:
                        current_minimum_loss = smoothen_loss

                    if verbose:
                        print("LRTest-Loss:"+str(smoothen_loss))

                    #break if loss is nan
                    if np.isnan(smoothen_loss):
                        break

                    #append to list
                    smoothen_loss_list.append(smoothen_loss)

                    #stopping criteria
                    if run_full == False:
                        if (batch_idx+1)>=(window_size+10):
                            #more robust early stopping criteria for lr search
                            #by checking on signage of first loss
                            #instead of relying purely on magnitude (i.e np.abs)
                            #which can be spurious when the signs are negative
                            if np.sign(loss_list[0]) >= 0:
                                if smoothen_loss>=(loss_list[0]*2):
                                    break
                            elif smoothen_loss>=(loss_list[0]/2):
                                break

            #prevent nan
            if np.isnan(loss):
                print("LRTest-Loss:"+str(smoothen_loss))
                break
    except Exception as e:
        print(e)

    smoothen_loss_list_scaled = (np.array(smoothen_loss_list)-np.min(smoothen_loss_list))/(smoothen_loss_list[0]-np.min(smoothen_loss_list))
    smoothen_loss_list_scaled = np.clip((smoothen_loss_list_scaled),a_min=-100,a_max=2)
    lr_list_plot = (lr_list)[window_size-1:(len(smoothen_loss_list_scaled)+window_size-1)]

    #fit gaussian process to the loss/lr to get a smoothen shape
    X, y = np.array(lr_list_plot).reshape(-1,1), np.array(smoothen_loss_list_scaled).reshape(-1,1)
    X_log10 = np.log10(X)
    kernel = RBF(10, (0.5, 10))
    gpr = GaussianProcessRegressor(kernel=kernel, random_state=0).fit(X_log10,y)
    gpr.score(X_log10,y)

    gp_mean, gp_sigma = gpr.predict(X_log10, return_std=True)
    gp_mean = gp_mean.flatten()
    gp_sigma = gp_sigma.flatten()
    negative_peaks, _ = find_peaks(-gp_mean)

    #get minimum lr
    residuals = np.abs(gp_mean[negative_peaks] - gp_mean[negative_peaks].min())
    minimum_loss_arg = np.argwhere(residuals<=0.05).flatten()[0]
    minimum_loss_arg = negative_peaks[minimum_loss_arg]
    minimum_loss = gp_mean[minimum_loss_arg]
    maximum_lr = lr_list[minimum_loss_arg+(window_size-1)]
    minimum_lr = lr_list[np.argwhere(gp_mean<=0.9)[0][0]+(window_size-1)]/2

    #round up to 3 significant figures
    maximum_lr = round_sig(maximum_lr,3)
    minimum_lr = round_sig(minimum_lr,3)
    if maximum_lr <= minimum_lr:
        temp_minimum_lr = copy.copy(maximum_lr)
        maximum_lr = copy.copy(minimum_lr)
        minimum_lr = temp_minimum_lr
    min_max_lr_text = "Min lr:{} , Max lr: {}".format(minimum_lr,maximum_lr)

    if verbose:
        print(min_max_lr_text)
    if plot:
        plot_learning_rate_finder(X,y,gp_mean,negative_peaks, minimum_lr,maximum_lr)

        #option to save plot
        if '.png' in savefile:
            create_dir(savefolder)
            plt.savefig(savefolder+"/"+savefile)


    #reset the model again after training
    if reset_params:
        bae_model.reset_parameters()

    #set parameters necessary for the scheduler
    bae_model.init_scheduler(half_iterations,minimum_lr,maximum_lr)
    bae_model.scheduler_enabled = True

    #load model state
    if save_mecha == "file":
        bae_model.load_model_state()
    if save_mecha == "copy":
        bae_model.autoencoder = temp_autoencoder
    return minimum_lr, maximum_lr, half_iterations
def plot_prediction_color(filename, material, method):
    df = pd.read_json(filename)
    df_filtered = df.loc[df['breeder_material_name'] == material]

    for k in range(
            1, 500
    ):  #improvement of the dataset we remove the worst tbr values and we had a better enrichment configuration to replace it
        X = list(df_filtered['enrichment_value'])
        y = list(df_filtered['value'])
        kernel = DotProduct() + WhiteKernel()
        gpr = GaussianProcessRegressor(kernel=kernel, random_state=0).fit(X, y)
        gpr.score(X, y)
        row_max_tbr = df_filtered.loc[df_filtered['value'].idxmax()]
        row_min_tbr = df_filtered.loc[df_filtered['value'].idxmin()]

        X.remove(row_min_tbr['enrichment_value'])
        y.remove(row_min_tbr['value'])

        #new_enrichment_value = [e+((-1)**k)*k/(100) for e in row_max_tbr['enrichment_value']]
        sequencer = ghalton.Halton(len(X[0]))

        if method == 'random':
            new_enrichment_value = []
            new_enrichment_value.append(
                row_max_tbr['enrichment_value'][0] +
                random.uniform(0, 1 - row_max_tbr['enrichment_value'][0]))
            new_enrichment_value.append(
                row_max_tbr['enrichment_value'][1] +
                random.uniform(0, 1 - row_max_tbr['enrichment_value'][1]))
            new_enrichment_value.append(
                row_max_tbr['enrichment_value'][2] +
                random.uniform(0, 1 - row_max_tbr['enrichment_value'][2]))
            print('new enrichment fraction', new_enrichment_value)
            append_to_json = find_tbr_dict(new_enrichment_value, material,
                                           True, 500000)
            #adjust the number of batches with the experiment
            X.append(new_enrichment_value)
            y.append(append_to_json['value'])

            with open(
                    'results_new_neutron_source/added_' + str(k) +
                    '_result_3_layers_halton_first_wall_neural_network.json',
                    'w') as file_object:
                json.dump([append_to_json], file_object, indent=2)

            print('file created')
            df_append = pd.read_json(
                'results_new_neutron_source/added_' + str(k) +
                '_result_3_layers_halton_first_wall_neural_network.json')
            df_filtered = df_filtered.append(df_append,
                                             ignore_index=True,
                                             sort=True)

            idx = df_filtered.index[df_filtered['value'] ==
                                    row_min_tbr['value']]
            df_filtered = df_filtered.drop(idx[0])

        elif method == 'halton':
            new_enrichment_value = []
            add_enrichment = sequencer.get(1)[0]
            new_enrichment_value.append(row_max_tbr['enrichment_value'][0] +
                                        add_enrichment[0])
            new_enrichment_value.append(row_max_tbr['enrichment_value'][1] +
                                        add_enrichment[1])
            new_enrichment_value.append(row_max_tbr['enrichment_value'][2] +
                                        add_enrichment[2])
            print('new enrichment fraction', new_enrichment_value)

            if new_enrichment_value[0] > 1:
                new_enrichment_value[0] = 1

            if new_enrichment_value[1] > 1:
                new_enrichment_value[1] = 1

            if new_enrichment_value[2] > 1:
                new_enrichment_value[2] = 1

            append_to_json = find_tbr_dict(new_enrichment_value, material,
                                           True, 500000)
            #adjust the number of batches with the experiment
            X.append(new_enrichment_value)
            y.append(append_to_json['value'])

            with open(
                    'results_new_neutron_source/added_' + str(k) +
                    '_result_3_layers_halton_first_wall_neural_network.json',
                    'w') as file_object:
                json.dump([append_to_json], file_object, indent=2)

            print('file created')
            df_append = pd.read_json(
                'results_new_neutron_source/added_' + str(k) +
                '_result_3_layers_halton_first_wall_neural_network.json')
            df_filtered = df_filtered.append(df_append,
                                             ignore_index=True,
                                             sort=True)

            idx = df_filtered.index[df_filtered['value'] ==
                                    row_min_tbr['value']]
            df_filtered = df_filtered.drop(idx[0])

    x_axis = [item[0] for item in X]
    y_axis = [item[1] for item in X]
    z_axis = [item[2] for item in X]

    y_predicted = gpr.predict(X)
    TBR = y_predicted
    text_list = []

    for x, y, z, t in zip(x_axis, y_axis, z_axis, TBR):
        text_list.append('TBR=' + str(round(t, 5)) + '<br>' +
                         'Enrichment first layer=' + str(round(x, 5)) +
                         '<br>' + 'Enrichment second layer=' +
                         str(round(y, 5)) + '<br>' +
                         'Enrichment third layer=' + str(round(z, 5)))

    trace = go.Scatter3d(
        x=x_axis,
        y=y_axis,
        z=z_axis,
        hoverinfo='text',
        text=text_list,
        mode='markers',
        marker=dict(
            size=2,
            color=TBR,  # set color to an array/list of desired values
            colorscale='Viridis',  # choose a colorscale
            colorbar=dict(title='TBR'),
            opacity=0.8))
    return (trace)