Пример #1
0
from package import gpr, io, rf, testhelper as th


def checkAlreadyDone(element, alreadylist):
    for x in alreadylist:
        if element == x:
            return True
    return False


# Data Collection
data = io.importdata('../data/Diffusion_Data_allfeatures.csv')
# data = io.importdata('../data/temp.csv')

groups = data['Material compositions 1'].values
data = io.sanitizedata(data)
gprsavedkernel = io.loadmodelobj('../models/GPR_data_Diffusion_Data_allfeatures_csv_02-24-20_18-32-12') \
    .getGPRkernel()

X = data.iloc[:, 1:]
Y = data.iloc[:, 0]
rfslope = 0.919216
rfintercept = -0.025370
y_std = statistics.stdev(Y.to_numpy(dtype=float))

# Setup thresholds

gpr_thresholds_range = round(np.arange(0.5, 1.2, 0.1), 1)
rf_thresholds_range = round(np.arange(0.5, 1.2, 0.1), 1)
normalityTests = ['RMSE']
defaults = {'RMSE': 1, 'Shapiro-Wilk': 0, 'DAgostino-Pearson': 0}
Пример #2
0
# This script imports the PV data set from its CSV file, removes unnecessary columns, and saves the x- and y-values as np arrays.
from package import io
import numpy as np

# import data
#data = io.importdata('perovskite_data/PVstability_Weipaper_alldata_featureselected.csv')
#data = io.sanitizedata(data, user_list=['is_testdata', 'Material Composition'])
data = io.importdata('perovskite_data/Perovskite_stability_Wei_updated.csv')
data = io.sanitizedata(data, user_list=['Compositions'])

# separate x- and y-values and save as numpy arrays
X_values = data.iloc[:, 1:]
y_values = data.iloc[:, 0]
X_values = X_values.to_numpy(dtype=float)
y_values = y_values.to_numpy(dtype=float)

# save arrays for later use
np.save('perovskite_data/all_x_values.npy', X_values)
np.save('perovskite_data/all_y_values.npy', y_values)