Python Data示例

	activation_function_hidd takes values "tanh", "relu" "leaky relu", "sigmoid"
'''

# Setting the Franke's function
poly_deg = 5
N = 30
seed = 2021
alpha = 0.001
lamb = 0.0

min_size = 5 #size of each minibatch
n_epochs = 1000 #number of epochs
eta = 0.001 # learning rate

# Setting Data
franke_data = Data()
franke_data.set_grid_franke_function(N,N,False)
franke_data.set_franke_function()
franke_data.add_noise(alpha, seed)

# Scaling data
franke_data.data_scaling()

activation_function_hidd = "tanh"
# -----------------------------------------------NN---------------------------------------------

franke_data.design_matrix(poly_deg)
franke_data.test_train_split(0.2)

n_hidd_layers = 1
nodes_in_hidd_layers = [50]

示例#2

显示文件

文件： Task_e.py 项目： jvevik/ML-projects

    plt.subplot(1, 5, i + 1)
    plt.axis('off')
    plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
    plt.title("Label: %d" % digits.target[random_indices[i]])
#plt.show()

# Splitting into the test and train datasets
X_train, X_test, Y_train, Y_test = train_test_split(inputs,
                                                    labels,
                                                    train_size=0.8,
                                                    test_size=0.2)

n_inputs, n_features = X_train.shape

# Converting into the one-hot representation
digits = Data()
Y_train_onehot, Y_test_onehot = digits.to_categorical_numpy(
    Y_train), digits.to_categorical_numpy(Y_test)

# For SGD
epochs = 100
batch_size = 50
eta = 0.01
lamb = 0.0
n_categories = 10

#For the GD
n_iterations = 100000

# user defined option
option = "SGD"

示例#3

显示文件

文件： decisiontree_article_analysis.py 项目： jvevik/ML-projects

The output of the program is two grid search files for the test and training MSE and R2,
as well as a print of the optimal values.
"""

np.random.seed(2020)

#values to perform gridsearch on
depth_values = np.linspace(1,15,15)
lambda_values = np.hstack((np.array([0.0]), np.logspace(-6,2,9)))

#values to extract from gridsearch
min_mse_test, min_r2_test, min_mse_train, min_r2_train = 1000, 0, 0, 0


# Setting up the dataset
bind_eng = Data()
bind_eng.set_binding_energies("mass16.txt")

# Tracing the article nuclei
bind_eng.find_indeces()

# Getting the total binding energies
bind_eng.z_flat = bind_eng.z_flat*bind_eng.A_numpy

# Scaling the data
bind_eng.data_scaling()


# Creating the design matrix. Workaround to create the XGB and DT design matrices
poly_deg = 1
bind_eng.design_matrix(poly_deg)

示例#4

显示文件

from random import random, seed

from src.data_processing import Data
from src.regression_methods import Fitting
from src.statistical_functions import *
np.random.seed(2020)
"""
The folowing program performs the Decision Tree analysis.
The user may change the depth_values to change the maximum depth of the tree,
as well as changing the lambda_values to change the pruning parameter.
The output of the program is two grid search files for the test and training MSE and R2,
as well as a print of the optimal values.
"""

# Setting up the dataset
bind_eng = Data()
bind_eng.set_binding_energies("mass16.txt")

# Scaling the data
bind_eng.data_scaling()

# Creating the design matrix
poly_deg = 1
bind_eng.design_matrix(poly_deg)
deleted_matrix = np.delete(bind_eng.X, 0, 1)
bind_eng.X = deleted_matrix

bind_eng.test_train_split(0.2)

# All we can use now is bind_eng.X_test, bind_eng.X_train, bind_eng.z_test, bind_eng.z_train

示例#5

显示文件

The folowing program performs the linear regression analysis with three options - OLS, Ridge and LASSO
The user might selesct one of the options by varying the regression_method variable taking values "OLS", "Ridge", "LASSO"
and switch one of the resampling options on by choosing between "cv" and "no resampling" for the resampling_method variable.
The output of the program is two grid search files for the test and training MSE and R2.

"""

seed = 2020

minimization_method = "matrix_inv" # this option is kept as "matrix_inv" for the whole analysis
regression_method = "Ridge" # "OLS", "Ridge", "LASSO"
resampling_method = "no resampling" # "cv" or "no resampling"
lamb = 0.001

# Setting up the dataset
bind_eng = Data()
bind_eng.set_binding_energies("mass16.txt")

# Scaling the data
bind_eng.data_scaling()

# Setting matrices and arrays for the grid search
num_poly = 23
poly_deg_array = np.linspace(1, num_poly, num_poly, dtype = int)
lambda_array = [ 0, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0]
num_lambda = len(lambda_array)

MSE_test = np.zeros((num_poly, num_lambda))
MSE_train = np.zeros((num_poly, num_lambda))
R2_test = np.zeros((num_poly, num_lambda))
R2_train = np.zeros((num_poly, num_lambda))

示例#6

显示文件

文件： XGB_analysis.py 项目： jvevik/ML-projects

from src.data_processing import Data
from src.regression_methods import Fitting
from src.statistical_functions import *
"""
Program to find the optimal parameters for the XGBoost method, by doing a gridsearch over the three included parameters;
lambda, learning rate and max. depth.

Last loop is commented out, but is used to find all three optimal parameters. Then the optimal learning rate is used to obtain
files for plotting.
"""

np.random.seed(2020)

# Setting up the dataset
bind_eng = Data()
bind_eng.set_binding_energies("mass16.txt")

bind_eng.z_flat = bind_eng.z_flat * bind_eng.A_numpy  #to get the total binding energy

# Scaling the data
bind_eng.data_scaling()

# Creating the design matrix
poly_deg = 1
bind_eng.design_matrix(poly_deg)
deleted_matrix = np.delete(bind_eng.X, 0, 1)
bind_eng.X = deleted_matrix

bind_eng.test_train_split(0.2)