def test_fit_3(): """ This function tests the thrid way of fitting the descriptor: the data is passed directly to the fit function. """ test_dir = os.path.dirname(os.path.realpath(__file__)) data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz") descriptor = data["arr_0"] classes = data["arr_1"] energies = data["arr_2"] estimator = ARMP() estimator.fit(x=descriptor, y=energies, classes=classes)
def test_predict_3(): test_dir = os.path.dirname(os.path.realpath(__file__)) data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz") descriptor = data["arr_0"] classes = data["arr_1"] energies = data["arr_2"] estimator = ARMP() estimator.fit(x=descriptor, y=energies, classes=classes) energies_pred = estimator.predict(x=descriptor, classes=classes) assert energies.shape == energies_pred.shape
def test_fit_2(): """ This function tests the second way of fitting the descriptor: the data is passed by storing the compounds in the class. """ test_dir = os.path.dirname(os.path.realpath(__file__)) data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz") descriptor = data["arr_0"] classes = data["arr_1"] energies = data["arr_2"] estimator = ARMP() estimator.set_representations(representations=descriptor) estimator.set_classes(classes=classes) estimator.set_properties(energies) idx = np.arange(0, 100) estimator.fit(idx)
def test_fit_1(): """ This function tests the first way of fitting the descriptor: the data is passed by first creating compounds and then the descriptors are created from the compounds. """ test_dir = os.path.dirname(os.path.realpath(__file__)) filenames = glob.glob(test_dir + "/CN_isobutane/*.xyz") energies = np.loadtxt(test_dir + '/CN_isobutane/prop_kjmol_training.txt', usecols=[1]) filenames.sort() estimator = ARMP(representation="acsf") estimator.generate_compounds(filenames[:50]) estimator.set_properties(energies[:50]) estimator.generate_representation() idx = np.arange(0, 50) estimator.fit(idx)
def test_score_3(): """ This function tests that all the scoring functions work. """ test_dir = os.path.dirname(os.path.realpath(__file__)) data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz") descriptor = data["arr_0"] classes = data["arr_1"] energies = data["arr_2"] estimator_1 = ARMP(scoring_function='mae') estimator_1.fit(x=descriptor, y=energies, classes=classes) estimator_1.score(x=descriptor, y=energies, classes=classes) estimator_2 = ARMP(scoring_function='r2') estimator_2.fit(x=descriptor, y=energies, classes=classes) estimator_2.score(x=descriptor, y=energies, classes=classes) estimator_3 = ARMP(scoring_function='rmse') estimator_3.fit(x=descriptor, y=energies, classes=classes) estimator_3.score(x=descriptor, y=energies, classes=classes)
import os import numpy as np ## ------------- ** Loading the data ** --------------- current_dir = os.path.dirname(os.path.realpath(__file__)) data = np.load(current_dir + '/../test/data/local_slatm_ch4cn_light.npz') representation = data["arr_0"] zs = data["arr_1"] energies = data["arr_2"] ## ------------- ** Setting up the estimator ** --------------- estimator = ARMP(iterations=150, l2_reg=0.0, learning_rate=0.005, hidden_layer_sizes=(40, 20, 10)) ## ------------- ** Fitting to the data ** --------------- estimator.fit(x=representation, y=energies, classes=zs) ## ------------- ** Predicting and scoring ** --------------- score = estimator.score(x=representation, y=energies, classes=zs) print("The mean absolute error is %s kJ/mol." % (str(-score))) energies_predict = estimator.predict(x=representation, classes=zs)
## ------------- ** Loading the data ** --------------- current_dir = os.path.dirname(os.path.realpath(__file__)) data = np.load(current_dir + '/../test/data/local_slatm_ch4cn_light.npz') descriptor = data["arr_0"] zs = data["arr_1"] energies = data["arr_2"] ## ------------- ** Setting up the estimator ** --------------- estimator = ARMP(iterations=100, l2_reg=0.0) estimator.set_representations(representations=descriptor) estimator.set_classes(zs) estimator.set_properties(energies) ## ------------- ** Fitting to the data ** --------------- idx = np.arange(0, 100) estimator.fit(idx) ## ------------- ** Predicting and scoring ** --------------- score = estimator.score(idx) print("The mean absolute error is %s kJ/mol." % (str(-score))) energies_predict = estimator.predict(idx)
store_frequency=2, hidden_layer_sizes=(50, 30, 10), batch_size=200) estimator.set_properties(ene_isopent) estimator.generate_representation(pad_xyz, pad_zs, method='fortran') print("Generated the representations") print(estimator.representation.shape) idx = list(range(n_samples)) idx_train, idx_test = modsel.train_test_split(idx, random_state=42, shuffle=True) estimator.fit(idx_train) data_squal = h5py.File( "/Volumes/Transcend/data_sets/CN_squalane/dft/squalane_cn_dft.hdf5", "r") xyz_squal = np.array(data_squal.get("xyz")[:10]) zs_squal = np.array(data_squal.get("zs")[:10], dtype=np.int32) ene_squal = np.array(data_squal.get("ene")[:10]) * 2625.50 ene_squal = ene_squal - ref_ene estimator.score(idx_test) pred1 = estimator.predict_from_xyz(xyz_squal, zs_squal) print("Done squal pred") pred2 = estimator.predict(idx_test)
# Training the model on 3 folds of n data points for n in n_samples: cv_idx = idx_train[:n] splitter = modsel.KFold(n_splits=3, random_state=42, shuffle=True) indices = splitter.split(cv_idx) scores_per_fold = [] traj_scores_per_fold = [] for item in indices: idx_train_fold = cv_idx[item[0]] idx_test_fold = cv_idx[item[1]] estimator.fit(idx_train_fold) score = estimator.score(idx_test_fold) traj_score = estimator.score(idx_test) scores_per_fold.append(score) traj_scores_per_fold.append(traj_score) tf.reset_default_graph() scores.append(scores_per_fold) traj_scores.append(traj_scores_per_fold) # Saving the data to an .npz file np.savez("./plot/scores_vr.npz", np.asarray(n_samples), np.asarray(scores), np.asarray(traj_scores))
l2_reg=0.0, hidden_layer_sizes=(40, 20, 10), tensorboard=True, store_frequency=10, # batch_size=400, batch_size=n_train, learning_rate=0.1, # scoring_function="mae", ) estimator.set_representations(representations=X) estimator.set_classes(Z) estimator.set_properties(Y) # idx = np.arange(0,100) # estimator.fit(idx) # score = estimator.score(idx) # estimator.fit(x=representation, y=energies, classes=zs) estimator.fit(x=X, y=Y, classes=Z) ## ------------- ** Predicting and scoring ** --------------- score = estimator.score(x=X, y=Y, classes=Z) print("The mean absolute error is %s kJ/mol." % (str(-score))) # energies_predict = estimator.predict(idx) # print(energies_predict)
[1, 2, 3]]) ene_true = np.array([0.5, 0.9, 1.0]) estimator = ARMP(iterations=10, l1_reg=0.0001, l2_reg=0.005, learning_rate=0.0005, representation='acsf', representation_params={"radial_rs": np.arange(0, 10, 5), "angular_rs": np.arange(0, 10, 5), "theta_s": np.arange(0, 3.14, 3)}, tensorboard=True, store_frequency=10 ) estimator.set_properties(ene_true) estimator.generate_representation(xyz, zs) idx = list(range(xyz.shape[0])) estimator.fit(idx) estimator.save_nn(save_dir="temp") pred1 = estimator.predict(idx) estimator.loaded_model = True estimator.fit(idx) pred2 = estimator.predict(idx) estimator.session.close() tf.reset_default_graph() new_estimator = ARMP(iterations=10, l1_reg=0.0001, l2_reg=0.005, learning_rate=0.0005, representation='acsf', representation_params={"radial_rs": np.arange(0, 10, 5), "angular_rs": np.arange(0, 10, 5), "theta_s": np.arange(0, 3.14, 3)},
ene_isopent = ene_isopent - ref_ene zs_isopent = np.array(data.get("zs"), dtype=np.int32) traj_idx = np.array(data.get("traj_idx")) file_idx = np.array(data.get("Filenumber")) # Finding the indices of the last trajectory so that it can be used as a test set idx_test = np.where(traj_idx==22)[0] idx_train = np.where(traj_idx!=22)[0] shuffle(idx_train) # Making sure that the model is trained on the same number of samples as the MD-NN idx_train_half = idx_train[:7621] # Creating the estimator acsf_params = {"nRs2":14, "nRs3":14, "nTs":14, "rcut":3.29, "acut":3.29, "zeta":100.06564927139748, "eta":39.81824764370754} estimator = ARMP(iterations=2633, batch_size=22, l1_reg=1.46e-05, l2_reg=0.0001, learning_rate=0.0013, representation_name='acsf', representation_params=acsf_params, tensorboard=True, store_frequency=25, hidden_layer_sizes=(185,)) estimator.set_properties(ene_isopent) estimator.generate_representation(xyz_isopent, zs_isopent, method="fortran") # Fitting the estimator and scoring it estimator.fit(idx_train_half) score = estimator.score(idx_test) # Saving the model for later reuse model_name = "vr-nn" estimator.save_nn(model_name) print(score)