Exemplo n.º 1
0
def test_predict_3():
    test_dir = os.path.dirname(os.path.realpath(__file__))

    data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz")
    descriptor = data["arr_0"]
    classes = data["arr_1"]
    energies = data["arr_2"]

    estimator = ARMP()
    estimator.fit(x=descriptor, y=energies, classes=classes)
    energies_pred = estimator.predict(x=descriptor, classes=classes)

    assert energies.shape == energies_pred.shape
Exemplo n.º 2
0
import os
import numpy as np

## ------------- ** Loading the data ** ---------------

current_dir = os.path.dirname(os.path.realpath(__file__))
data = np.load(current_dir + '/../test/data/local_slatm_ch4cn_light.npz')

representation = data["arr_0"]
zs = data["arr_1"]
energies = data["arr_2"]

## ------------- ** Setting up the estimator ** ---------------

estimator = ARMP(iterations=150,
                 l2_reg=0.0,
                 learning_rate=0.005,
                 hidden_layer_sizes=(40, 20, 10))

##  ------------- ** Fitting to the data ** ---------------

estimator.fit(x=representation, y=energies, classes=zs)

##  ------------- ** Predicting and scoring ** ---------------

score = estimator.score(x=representation, y=energies, classes=zs)

print("The mean absolute error is %s kJ/mol." % (str(-score)))

energies_predict = estimator.predict(x=representation, classes=zs)
Exemplo n.º 3
0
## ------------- ** Loading the data ** ---------------

current_dir = os.path.dirname(os.path.realpath(__file__))
data = np.load(current_dir + '/../test/data/local_slatm_ch4cn_light.npz')

descriptor = data["arr_0"]
zs = data["arr_1"]
energies = data["arr_2"]

## ------------- ** Setting up the estimator ** ---------------

estimator = ARMP(iterations=100, l2_reg=0.0)

estimator.set_representations(representations=descriptor)
estimator.set_classes(zs)
estimator.set_properties(energies)

##  ------------- ** Fitting to the data ** ---------------

idx = np.arange(0, 100)

estimator.fit(idx)

##  ------------- ** Predicting and scoring ** ---------------

score = estimator.score(idx)

print("The mean absolute error is %s kJ/mol." % (str(-score)))

energies_predict = estimator.predict(idx)
Exemplo n.º 4
0
print("Generated the representations")
print(estimator.representation.shape)

idx = list(range(n_samples))
idx_train, idx_test = modsel.train_test_split(idx,
                                              random_state=42,
                                              shuffle=True)

estimator.fit(idx_train)

data_squal = h5py.File(
    "/Volumes/Transcend/data_sets/CN_squalane/dft/squalane_cn_dft.hdf5", "r")

xyz_squal = np.array(data_squal.get("xyz")[:10])
zs_squal = np.array(data_squal.get("zs")[:10], dtype=np.int32)
ene_squal = np.array(data_squal.get("ene")[:10]) * 2625.50
ene_squal = ene_squal - ref_ene

estimator.score(idx_test)

pred1 = estimator.predict_from_xyz(xyz_squal, zs_squal)
print("Done squal pred")
pred2 = estimator.predict(idx_test)

# estimator.save_nn()
#
# print(pred1)
# print(pred2)
#
plt.scatter(pred2, pred1)
plt.show()
Exemplo n.º 5
0
estimator = ARMP(iterations=6000,
                 representation_name='acsf',
                 representation_params=acsf_params,
                 l1_reg=0.0,
                 l2_reg=0.0,
                 scoring_function="rmse",
                 tensorboard=False,
                 store_frequency=10,
                 learning_rate=0.075)

estimator.set_properties(energies[:100])
estimator.generate_compounds(filenames[:100])
estimator.generate_representation(method="tf")
print(estimator.representation.shape)

idx = list(range(100))

idx_train, idx_test = modsel.train_test_split(idx,
                                              test_size=0,
                                              random_state=42,
                                              shuffle=True)

estimator.fit(idx_train)

score = estimator.score(idx_train)
print("The RMSE is %s kcal/mol." % (str(score)))

ene_pred = estimator.predict(idx_train)

plt.scatter(energies[idx_train], ene_pred)
plt.show()
Exemplo n.º 6
0
    "zeta": 220.127,
    "eta": 30.8065
}
estimator = ARMP(iterations=200,
                 representation_name='acsf',
                 representation_params=acsf_params,
                 tensorboard=True,
                 store_frequency=10,
                 l1_reg=0.0001,
                 l2_reg=0.005,
                 learning_rate=0.0005)

estimator.set_properties(ene)
estimator.generate_representation(xyz, zs)

saved_dir = "saved_model"

estimator.load_nn(saved_dir)

idx = list(range(n_samples))

estimator.fit(idx)

pred_1 = estimator.predict(idx)
pred_2 = estimator.predict_from_xyz(xyz, zs)

# plt.scatter(pred_1, pred_2)
# plt.show()

# estimator.save_nn()
# Shuffling the indices of the data and then selecting the first 9625 data points
idx = list(range(len(ene_isopent)))
shuffle(idx)
idx = idx[:7621]

# Appending the true energies to a list
predictions = [ene_isopent[idx]]

# Creating the estimator
acsf_params = {"nRs2":10, "nRs3":10, "nTs":10, "rcut":3.18, "acut":3.18, "zeta":52.779232035094125, "eta":1.4954812022150898}

estimator = ARMP(iterations=5283, batch_size=37, l1_reg=8.931599068573057e-06, l2_reg=3.535679697949907e-05,
                 learning_rate=0.0008170485394812195, representation_name='acsf', representation_params=acsf_params,
                 tensorboard=True, store_frequency=25, hidden_layer_sizes=(15,88))

# Putting the data into the model
estimator.set_properties(ene_isopent)
estimator.generate_representation(xyz_isopent, zs_isopent, method="fortran")
estimator.load_nn("md-nn")

# Predicting the energies
predictions.append(estimator.predict(idx))

# Scoring the results
score = estimator.score(idx)
print(score)

# Saving the predictions to a npz file
np.savez("cross_pred_md_on_vr.npz", np.asarray(predictions))

# Loading the model previously trained
estimator.load_nn("../trained_nn/vr-nn")
estimator.set_properties(ene_surface)

# Generating the representation
start = time.time()
estimator.generate_representation(xyz_surface, zs_surface, method="fortran")
end = time.time()
print("The time taken to generate the representations is %s s" %
      (str(end - start)))
print("The shape of the representations is %s" %
      (str(estimator.representation.shape)))

# Predicting the energies
idx = list(range(n_samples))
predictions = estimator.predict(idx)

# Printing the mean absolute error
mae = mean_absolute_error(ene_surface, predictions)
print("The MAE is %.2f kJ/mol" % mae)

# Saving the results to a HDF5 file
f = h5py.File("VR-NN_surface_predictions.hdf5", "w")
f.create_dataset("ch_dist_alk", ch_dist_alk.shape, data=ch_dist_alk)
f.create_dataset("ch_dist_cn", ch_dist_cn.shape, data=ch_dist_cn)
f.create_dataset("h_id", h_id.shape, data=h_id)
f.create_dataset("xyz", xyz_surface.shape, data=xyz_surface)
f.create_dataset("ene", predictions.shape, data=predictions)
f.create_dataset("zs", zs_surface.shape, data=zs_surface)
f.close()
Exemplo n.º 9
0
data = h5py.File("/Volumes/Transcend/data_sets/CN_isopentane/pruned_dft_with_forces/pruned_isopentane_cn_dft.hdf5", "r")

n_samples = 500

xyz = np.array(data.get("xyz")[-n_samples:])
ene = np.array(data.get("ene")[-n_samples:])*2625.50
ene = ene - data.get("ene")[0]*2625.50
zs = np.array(data["zs"][-n_samples:], dtype=np.int32)

# Creating the estimator
acsf_param = {"nRs2": 5, "nRs3": 5, "nTs": 5, "rcut": 5, "acut": 5, "zeta": 220.127, "eta": 30.8065}
estimator = ARMP(iterations=1000, batch_size=512, l1_reg=0.0, l2_reg=0.0, learning_rate=0.001, representation_name='acsf',
                 representation_params=acsf_param, tensorboard=False, store_frequency=50)
estimator.set_properties(ene)
estimator.generate_representation(xyz, zs, method='fortran')
print(estimator.g.shape)

# Doing cross validation
idx = list(range(n_samples))
idx_train, idx_test = modsel.train_test_split(idx, test_size=0.15, random_state=42, shuffle=False)

print("Starting the fitting...")
estimator.fit(idx_train)

# estimator.save_nn("saved_model")

pred1 = estimator.predict(idx_train)
pred2 = estimator.predict_from_xyz(xyz[idx_train], zs[idx_train])


Exemplo n.º 10
0
                 l2_reg=0.005,
                 learning_rate=0.0005,
                 representation='acsf',
                 representation_params={
                     "radial_rs": np.arange(0, 10, 3),
                     "angular_rs": np.arange(0, 10, 3),
                     "theta_s": np.arange(0, 3.14, 3)
                 },
                 tensorboard=True,
                 store_frequency=10,
                 tensorboard_subdir="tb")

estimator.set_properties(ene)
estimator.generate_representation(xyz, zs)

saved_dir = "saved_model"

estimator.load_nn(saved_dir)

idx = list(range(n_samples))
idx_train, idx_test = modsel.train_test_split(idx,
                                              random_state=42,
                                              shuffle=True)

pred3 = estimator.predict(idx)

estimator.fit(idx_train)

print(estimator.predict(idx_test))
# estimator.score(idx_test)
Exemplo n.º 11
0
estimator = ARMP(iterations=10, l1_reg=0.0001, l2_reg=0.005, learning_rate=0.0005, representation='acsf',
                 representation_params={"radial_rs": np.arange(0, 10, 5), "angular_rs": np.arange(0, 10, 5),
                                        "theta_s": np.arange(0, 3.14, 3)},
                 tensorboard=True, store_frequency=10
                 )

estimator.set_properties(ene_true)
estimator.generate_representation(xyz, zs)

idx = list(range(xyz.shape[0]))

estimator.fit(idx)
estimator.save_nn(save_dir="temp")

pred1 = estimator.predict(idx)

estimator.loaded_model = True

estimator.fit(idx)

pred2 = estimator.predict(idx)
estimator.session.close()
tf.reset_default_graph()

new_estimator = ARMP(iterations=10, l1_reg=0.0001, l2_reg=0.005, learning_rate=0.0005, representation='acsf',
                 representation_params={"radial_rs": np.arange(0, 10, 5), "angular_rs": np.arange(0, 10, 5),
                                        "theta_s": np.arange(0, 3.14, 3)},
                    tensorboard=True, store_frequency=10
                     )
new_estimator.set_properties(ene_true)
Exemplo n.º 12
0
                 representation_params=acsf_params,
                 tensorboard=True,
                 learning_rate=0.075,
                 l1_reg=0.0,
                 l2_reg=0.0)

estimator.generate_compounds(filenames)
estimator.set_properties(energies)

estimator.generate_representation(method="fortran")
print("The shape of the representation is: %s" %
      (str(estimator.representation.shape)))

##  ------------- ** Fitting to the data ** ---------------

idx = np.arange(0, 100)
idx_train, idx_test = modsel.train_test_split(idx,
                                              test_size=0,
                                              random_state=42,
                                              shuffle=True)

estimator.fit(idx_train)

##  ------------- ** Predicting and scoring ** ---------------

score = estimator.score(idx_train)

print("The mean absolute error is %s kJ/mol." % (str(-score)))

energies_predict = estimator.predict(idx_train)