from package import gpr, io, rf, testhelper as th def checkAlreadyDone(element, alreadylist): for x in alreadylist: if element == x: return True return False # Data Collection data = io.importdata('../data/Diffusion_Data_allfeatures.csv') # data = io.importdata('../data/temp.csv') groups = data['Material compositions 1'].values data = io.sanitizedata(data) gprsavedkernel = io.loadmodelobj('../models/GPR_data_Diffusion_Data_allfeatures_csv_02-24-20_18-32-12') \ .getGPRkernel() X = data.iloc[:, 1:] Y = data.iloc[:, 0] rfslope = 0.919216 rfintercept = -0.025370 y_std = statistics.stdev(Y.to_numpy(dtype=float)) # Setup thresholds gpr_thresholds_range = round(np.arange(0.5, 1.2, 0.1), 1) rf_thresholds_range = round(np.arange(0.5, 1.2, 0.1), 1) normalityTests = ['RMSE'] defaults = {'RMSE': 1, 'Shapiro-Wilk': 0, 'DAgostino-Pearson': 0}
# This script imports the PV data set from its CSV file, removes unnecessary columns, and saves the x- and y-values as np arrays. from package import io import numpy as np # import data #data = io.importdata('perovskite_data/PVstability_Weipaper_alldata_featureselected.csv') #data = io.sanitizedata(data, user_list=['is_testdata', 'Material Composition']) data = io.importdata('perovskite_data/Perovskite_stability_Wei_updated.csv') data = io.sanitizedata(data, user_list=['Compositions']) # separate x- and y-values and save as numpy arrays X_values = data.iloc[:, 1:] y_values = data.iloc[:, 0] X_values = X_values.to_numpy(dtype=float) y_values = y_values.to_numpy(dtype=float) # save arrays for later use np.save('perovskite_data/all_x_values.npy', X_values) np.save('perovskite_data/all_y_values.npy', y_values)