def test_fit_3(): """ This function tests the thrid way of fitting the descriptor: the data is passed directly to the fit function. """ test_dir = os.path.dirname(os.path.realpath(__file__)) data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz") descriptor = data["arr_0"] classes = data["arr_1"] energies = data["arr_2"] estimator = ARMP() estimator.fit(x=descriptor, y=energies, classes=classes)
def test_predict_3(): test_dir = os.path.dirname(os.path.realpath(__file__)) data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz") descriptor = data["arr_0"] classes = data["arr_1"] energies = data["arr_2"] estimator = ARMP() estimator.fit(x=descriptor, y=energies, classes=classes) energies_pred = estimator.predict(x=descriptor, classes=classes) assert energies.shape == energies_pred.shape
def test_set_properties(): """ This test checks that the set_properties function sets the correct properties. :return: """ test_dir = os.path.dirname(os.path.realpath(__file__)) energies = np.loadtxt(test_dir + '/CN_isobutane/prop_kjmol_training.txt', usecols=[1]) estimator = ARMP(representation='slatm') assert estimator.properties == None estimator.set_properties(energies) assert np.all(estimator.properties == energies)
def test_fit_1(): """ This function tests the first way of fitting the descriptor: the data is passed by first creating compounds and then the descriptors are created from the compounds. """ test_dir = os.path.dirname(os.path.realpath(__file__)) filenames = glob.glob(test_dir + "/CN_isobutane/*.xyz") energies = np.loadtxt(test_dir + '/CN_isobutane/prop_kjmol_training.txt', usecols=[1]) filenames.sort() estimator = ARMP(representation="acsf") estimator.generate_compounds(filenames[:50]) estimator.set_properties(energies[:50]) estimator.generate_representation() idx = np.arange(0, 50) estimator.fit(idx)
def test_fit_2(): """ This function tests the second way of fitting the descriptor: the data is passed by storing the compounds in the class. """ test_dir = os.path.dirname(os.path.realpath(__file__)) data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz") descriptor = data["arr_0"] classes = data["arr_1"] energies = data["arr_2"] estimator = ARMP() estimator.set_representations(representations=descriptor) estimator.set_classes(classes=classes) estimator.set_properties(energies) idx = np.arange(0, 100) estimator.fit(idx)
def test_set_descriptor(): """ This test checks that the set_descriptor function works as expected. :return: """ test_dir = os.path.dirname(os.path.realpath(__file__)) data_incorrect = np.load(test_dir + "/data/CN_isopent_light_UCM.npz") data_correct = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz") descriptor_correct = data_correct["arr_0"] descriptor_incorrect = data_incorrect["arr_0"] estimator = ARMP() assert estimator.representation == None estimator.set_representations(representations=descriptor_correct) assert np.all(estimator.representation == descriptor_correct) # Pass a descriptor with the wrong shape try: estimator.set_representations(representations=descriptor_incorrect) raise Exception except InputError: pass
def test_set_representation(): """ This function tests the function _set_representation. """ try: ARMP(representation='slatm', representation_params={'slatm_sigma12': 0.05}) raise Exception except InputError: pass try: ARMP(representation='coulomb_matrix') raise Exception except InputError: pass try: ARMP(representation='slatm', representation_params={'slatm_alchemy': 0.05}) raise Exception except InputError: pass parameters = { 'slatm_sigma1': 0.07, 'slatm_sigma2': 0.04, 'slatm_dgrid1': 0.02, 'slatm_dgrid2': 0.06, 'slatm_rcut': 5.0, 'slatm_rpower': 7, 'slatm_alchemy': True } estimator = ARMP(representation='slatm', representation_params=parameters) assert estimator.representation_name == 'slatm' assert estimator.slatm_parameters == parameters
zs = np.array(data["zs"][-n_samples:], dtype=np.int32) # Creating the estimator acsf_params = { "nRs2": 5, "nRs3": 5, "nTs": 5, "rcut": 5, "acut": 5, "zeta": 220.127, "eta": 30.8065 } estimator = ARMP(iterations=200, representation_name='acsf', representation_params=acsf_params, tensorboard=True, store_frequency=10, l1_reg=0.0001, l2_reg=0.005, learning_rate=0.0005) estimator.set_properties(ene) estimator.generate_representation(xyz, zs) saved_dir = "saved_model" estimator.load_nn(saved_dir) idx = list(range(n_samples)) estimator.fit(idx)
def test_score_3(): """ This function tests that all the scoring functions work. """ test_dir = os.path.dirname(os.path.realpath(__file__)) data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz") descriptor = data["arr_0"] classes = data["arr_1"] energies = data["arr_2"] estimator_1 = ARMP(scoring_function='mae') estimator_1.fit(x=descriptor, y=energies, classes=classes) estimator_1.score(x=descriptor, y=energies, classes=classes) estimator_2 = ARMP(scoring_function='r2') estimator_2.fit(x=descriptor, y=energies, classes=classes) estimator_2.score(x=descriptor, y=energies, classes=classes) estimator_3 = ARMP(scoring_function='rmse') estimator_3.fit(x=descriptor, y=energies, classes=classes) estimator_3.score(x=descriptor, y=energies, classes=classes)
import shutil from qml.aglaia.aglaia import ARMP import tensorflow as tf xyz = np.array([[[0, 1, 0], [0, 1, 1], [1, 0, 1]], [[1, 2, 2], [3, 1, 2], [1, 3, 4]], [[4, 1, 2], [0.5, 5, 6], [-1, 2, 3]]]) zs = np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3]]) ene_true = np.array([0.5, 0.9, 1.0]) estimator = ARMP(iterations=10, l1_reg=0.0001, l2_reg=0.005, learning_rate=0.0005, representation='acsf', representation_params={"radial_rs": np.arange(0, 10, 5), "angular_rs": np.arange(0, 10, 5), "theta_s": np.arange(0, 3.14, 3)}, tensorboard=True, store_frequency=10 ) estimator.set_properties(ene_true) estimator.generate_representation(xyz, zs) idx = list(range(xyz.shape[0])) estimator.fit(idx) estimator.save_nn(save_dir="temp") pred1 = estimator.predict(idx) estimator.loaded_model = True
## ------------- ** Setting up the estimator ** --------------- acsf_params = { "nRs2": 5, "nRs3": 5, "nTs": 5, "rcut": 5, "acut": 5, "zeta": 220.127, "eta": 30.8065 } estimator = ARMP(iterations=5000, representation_name='acsf', representation_params=acsf_params, tensorboard=True, learning_rate=0.075, l1_reg=0.0, l2_reg=0.0) estimator.generate_compounds(filenames) estimator.set_properties(energies) estimator.generate_representation(method="fortran") print("The shape of the representation is: %s" % (str(estimator.representation.shape))) ## ------------- ** Fitting to the data ** --------------- idx = np.arange(0, 100) idx_train, idx_test = modsel.train_test_split(idx,
from qml.aglaia.aglaia import ARMP import numpy as np import os ## ------------- ** Loading the data ** --------------- current_dir = os.path.dirname(os.path.realpath(__file__)) data = np.load(current_dir + '/../test/data/local_slatm_ch4cn_light.npz') descriptor = data["arr_0"] zs = data["arr_1"] energies = data["arr_2"] ## ------------- ** Setting up the estimator ** --------------- estimator = ARMP(iterations=100, l2_reg=0.0) estimator.set_representations(representations=descriptor) estimator.set_classes(zs) estimator.set_properties(energies) ## ------------- ** Fitting to the data ** --------------- idx = np.arange(0, 100) estimator.fit(idx) ## ------------- ** Predicting and scoring ** --------------- score = estimator.score(idx)
print("%i files were loaded." % (n_samples)) acsf_params = { "nRs2": 5, "nRs3": 5, "nTs": 5, "rcut": 5, "acut": 5, "zeta": 220.127, "eta": 30.8065 } estimator = ARMP(iterations=6000, representation_name='acsf', representation_params=acsf_params, l1_reg=0.0, l2_reg=0.0, scoring_function="rmse", tensorboard=False, store_frequency=10, learning_rate=0.075) estimator.set_properties(energies[:100]) estimator.generate_compounds(filenames[:100]) estimator.generate_representation(method="tf") print(estimator.representation.shape) idx = list(range(100)) idx_train, idx_test = modsel.train_test_split(idx, test_size=0, random_state=42,
# Creating the estimator acsf_params = { "nRs2": 14, "nRs3": 14, "nTs": 14, "rcut": 3.29, "acut": 3.29, "zeta": 100.06564927139748, "eta": 39.81824764370754 } estimator = ARMP(iterations=2633, batch_size=22, l1_reg=1.46e-05, l2_reg=0.0001, learning_rate=0.0013, representation_name='acsf', representation_params=acsf_params, tensorboard=True, store_frequency=25, hidden_layer_sizes=(185, )) # Loading the model previously trained estimator.load_nn("../trained_nn/vr-nn") estimator.set_properties(ene_surface) # Generating the representation start = time.time() estimator.generate_representation(xyz_surface, zs_surface, method="fortran") end = time.time() print("The time taken to generate the representations is %s s" % (str(end - start)))
], 'radial_cutoff': 5, 'angular_cutoff': 5, 'zeta': 17.8630, 'eta': 2.5148 } # Generate estimator estimator = ARMP(iterations=1, l1_reg=0.0001, l2_reg=0.005, learning_rate=0.0005, representation_name='acsf', representation_params=acsf_params, tensorboard=False, store_frequency=10) estimator.load_nn() data_squal = h5py.File( "/Volumes/Transcend/data_sets/CN_squalane/dft/squalane_cn_dft.hdf5", "r") xyz_squal = np.array(data_squal.get("xyz")[:10]) zs_squal = np.array(data_squal.get("zs")[:10], dtype=np.int32) ene_squal = np.array(data_squal.get("ene")[:10]) * 2625.50 pred1 = estimator.predict_from_xyz(xyz_squal, zs_squal) print(pred1)
from qml.aglaia.aglaia import ARMP import glob import numpy as np import os ## ------------- ** Loading the data ** --------------- current_dir = os.path.dirname(os.path.realpath(__file__)) filenames = glob.glob(current_dir + '/../test/CN_isobutane/*.xyz') energies = np.loadtxt(current_dir + '/../test/CN_isobutane/prop_kjmol_training.txt', usecols=[1]) filenames.sort() ## ------------- ** Setting up the estimator ** --------------- estimator = ARMP(iterations=10, representation='acsf', representation_params={"radial_rs": np.arange(0, 10, 1), "angular_rs": np.arange(0.5, 10.5, 1), "theta_s": np.arange(0, 5, 1)}, tensorboard=False) estimator.generate_compounds(filenames) estimator.set_properties(energies) estimator.generate_representation() ## ------------- ** Fitting to the data ** --------------- idx = np.arange(0,100) estimator.fit(idx) ## ------------- ** Predicting and scoring ** ---------------
## ------------- ** Loading the data ** --------------- current_dir = os.path.dirname(os.path.realpath(__file__)) data = np.load( "/Volumes/Transcend/repositories/my_qml_fork/qml/test/data/local_slatm_ch4cn_light.npz" ) descriptor = data["arr_0"] zs = data["arr_1"] energies = data["arr_2"] ## ------------- ** Setting up the estimator ** --------------- estimator = ARMP(iterations=3000, learning_rate=0.075, l1_reg=0.0, l2_reg=0.0, tensorboard=True, store_frequency=50) estimator.set_representations(representations=descriptor) estimator.set_classes(zs) estimator.set_properties(energies) ## ------------- ** Fitting to the data ** --------------- idx = np.arange(0, 100) estimator.fit(idx) ## ------------- ** Predicting and scoring ** ---------------
from sklearn import model_selection as modsel import tensorflow as tf # Getting the dataset data = h5py.File("/Volumes/Transcend/data_sets/CN_isopentane/pruned_dft_with_forces/pruned_isopentane_cn_dft.hdf5", "r") n_samples = 500 xyz = np.array(data.get("xyz")[-n_samples:]) ene = np.array(data.get("ene")[-n_samples:])*2625.50 ene = ene - data.get("ene")[0]*2625.50 zs = np.array(data["zs"][-n_samples:], dtype=np.int32) # Creating the estimator acsf_param = {"nRs2": 5, "nRs3": 5, "nTs": 5, "rcut": 5, "acut": 5, "zeta": 220.127, "eta": 30.8065} estimator = ARMP(iterations=1000, batch_size=512, l1_reg=0.0, l2_reg=0.0, learning_rate=0.001, representation_name='acsf', representation_params=acsf_param, tensorboard=False, store_frequency=50) estimator.set_properties(ene) estimator.generate_representation(xyz, zs, method='fortran') print(estimator.g.shape) # Doing cross validation idx = list(range(n_samples)) idx_train, idx_test = modsel.train_test_split(idx, test_size=0.15, random_state=42, shuffle=False) print("Starting the fitting...") estimator.fit(idx_train) # estimator.save_nn("saved_model") pred1 = estimator.predict(idx_train) pred2 = estimator.predict_from_xyz(xyz[idx_train], zs[idx_train])
n_samples = 300 xyz = np.array(data.get("xyz")[-n_samples:]) ene = np.array(data.get("ene")[-n_samples:]) * 2625.50 ene = ene - data.get("ene")[0] * 2625.50 zs = np.array(data["zs"][-n_samples:], dtype=np.int32) # Creating the estimator estimator = ARMP(iterations=100, l1_reg=0.0001, l2_reg=0.005, learning_rate=0.0005, representation='acsf', representation_params={ "radial_rs": np.arange(0, 10, 3), "angular_rs": np.arange(0, 10, 3), "theta_s": np.arange(0, 3.14, 3) }, tensorboard=True, store_frequency=10, tensorboard_subdir="tb") estimator.set_properties(ene) estimator.generate_representation(xyz, zs) saved_dir = "saved_model" estimator.load_nn(saved_dir) idx = list(range(n_samples))
# List of properties Y = np.array([mol.properties for mol in training]) Ys = np.array([mol.properties for mol in test]) ## ------------- ** Setting up the estimator ** --------------- print(Z) print(Z.shape) estimator = ARMP( iterations=10, l1_reg=0.0, l2_reg=0.0, hidden_layer_sizes=(40, 20, 10), tensorboard=True, store_frequency=10, # batch_size=400, batch_size=n_train, learning_rate=0.1, # scoring_function="mae", ) estimator.set_representations(representations=X) estimator.set_classes(Z) estimator.set_properties(Y) # idx = np.arange(0,100) # estimator.fit(idx) # score = estimator.score(idx)
ene_isopent = ene_isopent - ref_ene zs_isopent = np.array(data["zs"], dtype=np.int32) # Shuffling the indices of the data and then selecting the first 9625 data points idx = list(range(len(ene_isopent))) shuffle(idx) idx = idx[:7621] # Appending the true energies to a list predictions = [ene_isopent[idx]] # Creating the estimator acsf_params = {"nRs2":10, "nRs3":10, "nTs":10, "rcut":3.18, "acut":3.18, "zeta":52.779232035094125, "eta":1.4954812022150898} estimator = ARMP(iterations=5283, batch_size=37, l1_reg=8.931599068573057e-06, l2_reg=3.535679697949907e-05, learning_rate=0.0008170485394812195, representation_name='acsf', representation_params=acsf_params, tensorboard=True, store_frequency=25, hidden_layer_sizes=(15,88)) # Putting the data into the model estimator.set_properties(ene_isopent) estimator.generate_representation(xyz_isopent, zs_isopent, method="fortran") estimator.load_nn("md-nn") # Predicting the energies predictions.append(estimator.predict(idx)) # Scoring the results score = estimator.score(idx) print(score) # Saving the predictions to a npz file
from qml.aglaia.aglaia import ARMP import glob import numpy as np from sklearn import model_selection as modsel test_dir = "/Volumes/Transcend/repositories/my_qml_fork/qml/test/" filenames = glob.glob(test_dir + "/qm7/*.xyz") energies = np.loadtxt(test_dir + '/data/hof_qm7.txt', usecols=[1]) filenames.sort() n_samples = 500 estimator = ARMP(representation_name="acsf", iterations=100) estimator.generate_compounds(filenames[:n_samples]) estimator.set_properties(energies[:n_samples]) estimator.generate_representation(method="fortran") idx = np.arange(0, n_samples) idx_train, idx_test = modsel.train_test_split(idx, random_state=42, shuffle=True, test_size=0.1) estimator.fit(idx_train) estimator.score(idx_train)
# Creating the estimator acsf_params = { "nRs2": 14, "nRs3": 14, "nTs": 14, "rcut": 3.29, "acut": 3.29, "zeta": 100.06564927139748, "eta": 39.81824764370754 } estimator = ARMP(iterations=2633, batch_size=22, l1_reg=1.46e-05, l2_reg=0.0001, learning_rate=0.0013, representation_name='acsf', representation_params=acsf_params, tensorboard=True, store_frequency=25, hidden_layer_sizes=(185, )) estimator.set_properties(ene_isopent) estimator.generate_representation(xyz_isopent, zs_isopent, method="fortran") # Training the model on 3 folds of n data points for n in n_samples: cv_idx = idx_train[:n] splitter = modsel.KFold(n_splits=3, random_state=42, shuffle=True) indices = splitter.split(cv_idx)
acsf_params = { "nRs2": 15, "nRs3": 15, "nTs": 15, "rcut": 5, "acut": 5, "zeta": 220.127, "eta": 30.8065 } # Generate estimator estimator = ARMP(iterations=10, l1_reg=0.0001, l2_reg=0.005, learning_rate=0.0005, representation_name='acsf', representation_params=acsf_params, tensorboard=True, store_frequency=2, hidden_layer_sizes=(50, 30, 10), batch_size=200) estimator.set_properties(ene_isopent) estimator.generate_representation(pad_xyz, pad_zs, method='fortran') print("Generated the representations") print(estimator.representation.shape) idx = list(range(n_samples)) idx_train, idx_test = modsel.train_test_split(idx, random_state=42, shuffle=True)
print("The l1 regularisation values:") print(l1_reg) print("The l2 regularisation values:") print(l2_reg) acsf_params = { "radial_rs": np.arange(0, 10, 0.5), "angular_rs": np.arange(0, 10, 0.5), "theta_s": np.arange(0, 3.14, 0.25) } estimator = ARMP(iterations=2000, batch_size=256, l1_reg=0.0001, l2_reg=0.005, learning_rate=0.00015, representation='acsf', representation_params=acsf_params, tensorboard=True, store_frequency=50) estimator.set_properties(ene) estimator.generate_representation(xyz, zs) idx = list(range(n_samples)) idx_train, idx_test = modsel.train_test_split(idx, test_size=0.15, random_state=42, shuffle=True) all_scores = []
import os import numpy as np ## ------------- ** Loading the data ** --------------- current_dir = os.path.dirname(os.path.realpath(__file__)) data = np.load(current_dir + '/../test/data/local_slatm_ch4cn_light.npz') representation = data["arr_0"] zs = data["arr_1"] energies = data["arr_2"] ## ------------- ** Setting up the estimator ** --------------- estimator = ARMP(iterations=150, l2_reg=0.0, learning_rate=0.005, hidden_layer_sizes=(40, 20, 10)) ## ------------- ** Fitting to the data ** --------------- estimator.fit(x=representation, y=energies, classes=zs) ## ------------- ** Predicting and scoring ** --------------- score = estimator.score(x=representation, y=energies, classes=zs) print("The mean absolute error is %s kJ/mol." % (str(-score))) energies_predict = estimator.predict(x=representation, classes=zs)
from qml.aglaia.aglaia import ARMP import os import numpy as np ## ------------- ** Loading the data ** --------------- current_dir = os.path.dirname(os.path.realpath(__file__)) data = np.load("/Volumes/Transcend/repositories/my_qml_fork/qml/test/data/local_slatm_ch4cn_light.npz") representation = data["arr_0"] zs = data["arr_1"] energies = data["arr_2"] ## ------------- ** Setting up the estimator ** --------------- estimator = ARMP(iterations=3000, learning_rate=0.075, l1_reg=0.0, l2_reg=0.0, tensorboard=True, store_frequency=50) ## ------------- ** Fitting to the data ** --------------- estimator.fit(x=representation, y=energies, classes=zs) ## ------------- ** Predicting and scoring ** --------------- score = estimator.score(x=representation, y=energies, classes=zs) print("The mean absolute error is %s kJ/mol." % (str(-score))) energies_predict = estimator.predict(x=representation, classes=zs)