def train_only(): SIGMA = 10.0 # Read training data from file X, dX, Q, E, F = get_data_from_file(FILENAME_TRAIN, n=40) offset = E.mean() E -= offset print(offset) F = np.concatenate(F) Y = np.concatenate((E, F.flatten())) print("Kernels ...") Kte = get_atomic_local_kernel(X, X, Q, Q, SIGMA) Kt = get_atomic_local_gradient_kernel(X, X, dX, Q, Q, SIGMA) C = np.concatenate((Kte, Kt)) print("Alphas operator ...") alpha = svd_solve(C, Y, rcond=1e-11) np.save("data/training_alphas.npy", alpha) np.save("data/training_Q.npy", Q) np.save("data/training_X.npy", X)
def train(dataname, n_train=100): SIGMA = 10.0 filename_train = "data/" + dataname + "-train.npz" # Read training data from file X, dX, Q, E, F = get_data_from_file(filename_train, n=n_train) offset = E.mean() E -= offset print("OFFSET: ", offset) F = np.concatenate(F) Y = np.concatenate((E, F.flatten())) print("Generating Kernels ...") Kte = get_atomic_local_kernel(X, X, Q, Q, SIGMA) Kt = get_atomic_local_gradient_kernel(X, X, dX, Q, Q, SIGMA) C = np.concatenate((Kte, Kt)) print("Alphas operator ...") alpha = svd_solve(C, Y, rcond=1e-11) np.save("data/"+dataname+"_offset.npy", offset) np.save("data/"+dataname+"_sigma.npy", SIGMA) np.save("data/"+dataname+"_alphas.npy", alpha) np.save("data/"+dataname+"_Q.npy", Q) np.save("data/"+dataname+"_X.npy", X) return
def predict(nuclear_charges, coordinates): """ Given a query molecule (charges and coordinates) predict energy and forces """ # Initialize training data (only need to do this once) alpha = np.load(FILENAME_ALPHAS) X = np.load(FILENAME_REPRESENTATIONS) Q = np.load(FILENAME_CHARGES) # Generate representation max_atoms = X.shape[1] (rep, drep) = generate_fchl_acsf(nuclear_charges, coordinates, gradients=True, pad=max_atoms) # Put data into arrays Qs = [nuclear_charges] Xs = np.array([rep]) dXs = np.array([drep]) # Get kernels Kse = get_atomic_local_kernel(X, Xs, Q, Qs, SIGMA) Ks = get_atomic_local_gradient_kernel(X, Xs, dXs, Q, Qs, SIGMA) # Offset from training offset = -97084.83100465109 # Energy prediction energy_predicted = np.dot(Kse, alpha)[0] + offset energy_true = -97086.55524903 print("True energy %16.4f kcal/mol" % energy_true) print("Predicted energy %16.4f kcal/mol" % energy_predicted) # Force prediction forces_predicted = np.dot(Ks, alpha).reshape((len(nuclear_charges), 3)) forces_true = np.array([[-66.66673100, 2.45752385, 49.92224945], [-17.98600137, 68.72856500, -28.82689294], [31.88432927, 8.98739402, -18.11946195], [4.19798833, -31.31692744, 8.12825145], [16.78395377, -24.76072606, -38.99054658], [6.03046276, -7.24928076, -3.88797517], [17.44954868, 0.21604968, 8.56118603], [11.73901551, -19.38200606, 13.26191987], [-3.43256595, 2.31940789, 9.95126984]]) print("True forces [kcal/mol]") print(forces_true) print("Predicted forces [kcal/mol]") print(forces_predicted) return
def query(self, atoms=None, print_time=True): if print_time: start = time.time() # kcal/mol til ev # kcal/mol/aangstrom til ev / aangstorm conv_energy = 0.0433635093659 conv_force = 0.0433635093659 coordinates = atoms.get_positions() nuclear_charges = atoms.get_atomic_numbers() n_atoms = coordinates.shape[0] new_cut = 4.0 cut_parameters = { "rcut": new_cut, "acut": new_cut, # "nRs2": int(24 * new_cut / 8.0), # "nRs3": int(20 * new_cut / 8.0), } rep, drep = generate_fchl_acsf(nuclear_charges, coordinates, gradients=True, elements=[1, 6, 8], pad=self.max_atoms, **cut_parameters) # Put data into arrays Qs = [nuclear_charges] Xs = np.array([rep], order="F") dXs = np.array([drep], order="F") # Get kernels Kse = get_atomic_local_kernel(self.repr, Xs, self.charges, Qs, self.sigma) Ks = get_atomic_local_gradient_kernel(self.repr, Xs, dXs, self.charges, Qs, self.sigma) # Energy prediction energy_predicted = np.dot(Kse, self.alphas)[0] + self.offset self.energy = energy_predicted * conv_energy # Force prediction forces_predicted = np.dot(Ks, self.alphas).reshape((n_atoms, 3)) self.forces = forces_predicted * conv_force if print_time: end = time.time() print("qml query {:7.3f}s {:10.3f} ".format( end - start, energy_predicted)) return
def generate_kernel(X1, X2, dX, charges1, charges2, sigma=10.0, **kwargs): """ x representations dx d_representations """ Kte = get_atomic_local_kernel(X1, X2, charges1, charges2, sigma) Kt = get_atomic_local_gradient_kernel(X1, X2, dX, charges1, charges2, sigma) return Kte, Kt
def get_kernel(X1, X2, charges1, charges2, sigma=1, mode="local"): """ mode local or atomic """ if len(X1.shape) > 2: K = get_atomic_local_kernel(X1, X2, charges1, charges2, sigma) else: K = laplacian_kernel(X2, X1, sigma) return K
def train(dataname, n_train=100): SIGMA = 10.0 # Read training data from file # X, dX, Q, E, F = get_data_from_file(filename_train, n=n_train) Xall, dXall, Qall, Eall, Fall = csvdir_to_reps(dataname) if len(Eall) < n_train: print("Not enough training data for", n_train) exit() idx = list(range(len(Eall))) np.random.shuffle(idx) train = idx[:n_train] print(len(train)) X = Xall[train] dX = dXall[train] Q = [Qall[i] for i in train] E = Eall[train] F = [Fall[i] for i in train] offset = 0.0 print("OFFSET: ", offset) F = np.concatenate(F) Y = np.concatenate((E, F.flatten())) print("Generating Kernels ...") Kte = get_atomic_local_kernel(X, X, Q, Q, SIGMA) Kt = get_atomic_local_gradient_kernel(X, X, dX, Q, Q, SIGMA) C = np.concatenate((Kte, Kt)) print("Alphas operator ...") alpha = svd_solve(C, Y, rcond=1e-11) np.save("data/" + dataname + "_offset.npy", offset) np.save("data/" + dataname + "_sigma.npy", SIGMA) np.save("data/" + dataname + "_alphas.npy", alpha) np.save("data/" + dataname + "_Q.npy", Q, allow_pickle=True) np.save("data/" + dataname + "_X.npy", X) return
def query(self, atoms=None): if self.debug: start = time.time() # kcal/mol til ev # kcal/mol/aangstrom til ev / aangstorm conv_energy = 0.0433635093659 conv_force = 0.0433635093659 coordinates = atoms.get_positions() nuclear_charges = atoms.get_atomic_numbers() n_atoms = coordinates.shape[0] # Calculate representation for query molecule rep, drep = generate_fchl_acsf(nuclear_charges, coordinates, gradients=True, **self.parameters) # Put data into arrays Qs = [nuclear_charges] Xs = np.array([rep], order="F") dXs = np.array([drep], order="F") # Get kernels Kse = get_atomic_local_kernel(self.repr, Xs, self.charges, Qs, self.sigma) Ks = get_atomic_local_gradient_kernel(self.repr, Xs, dXs, self.charges, Qs, self.sigma) # Energy prediction energy_predicted = np.dot(Kse, self.alphas)[0] + self.offset self.energy = energy_predicted * conv_energy # Force prediction forces_predicted = np.dot(Ks, self.alphas).reshape((n_atoms, 3)) self.forces = forces_predicted * conv_force if self.debug: end = time.time() print("fchl19 query {:7.3f}s {:10.3f} ".format( end - start, energy_predicted)) return
def get_potential_energy(self, atoms=None, force_consistent=False): x = [] disp_x = [] q = [] # x1 = generate_fchl_acsf(atoms.get_atomic_numbers(), atoms.get_positions(), gradients=False, pad=9, elements=[1,6,7,9,17,35]) x1 = generate_fchl_acsf(atoms.get_atomic_numbers(), atoms.get_positions(), gradients=False, pad=self.nAtoms) x.append(x1) q.append(atoms.get_atomic_numbers()) Xs = np.array(x) Qs = q Kse = get_atomic_local_kernel(self.X, Xs, self.Q, Qs, self.sigmas) energy = (float(np.dot(Kse, self.alphas))) * convback_E return energy
def train(): # print(" -> Start training") # start = time() # subprocess.Popen(("python3","model_training.py","train")) # end = time() # # total_runtime = end - start # # print(" -> Training time: {:.3f}".format(total_runtime)) #data = get_properties("energies.txt") data = get_properties("train") mols = [] mols_pred = [] SIGMA = 2.5 #float(sys.argv[1]) for name in sorted(data.keys()): mol = qml.Compound() mol.read_xyz("xyz/" + name + ".xyz") # Associate a property (heat of formation) with the object mol.properties = data[name][0] mols.append(mol) shuffle(mols) #mols_train = mols[:400] #mols_test = mols[400:] # REPRESENTATIONS print("\n -> calculate representations") start = time() x = [] disp_x = [] f = [] e = [] q = [] for mol in mols: (x1, dx1) = generate_fchl_acsf(mol.nuclear_charges, mol.coordinates, gradients=True, pad=23, elements=[1, 6, 7, 8, 16, 17]) e.append(mol.properties) f.append(data[(mol.name)[4:-4]][1]) x.append(x1) disp_x.append(dx1) q.append(mol.nuclear_charges) X_train = np.array(x) F_train = np.array(f) F_train *= -1 E_train = np.array(e) dX_train = np.array(disp_x) Q_train = q E_mean = np.mean(E_train) E_train -= E_mean F_train = np.concatenate(F_train) end = time() print(end - start) print("") print(" -> calculating Kernels") start = time() Kte = get_atomic_local_kernel(X_train, X_train, Q_train, Q_train, SIGMA) #Kte_test = get_atomic_local_kernel(X_train, X_test, Q_train, Q_test, SIGMA) Kt = get_atomic_local_gradient_kernel(X_train, X_train, dX_train, Q_train, Q_train, SIGMA) #Kt_test = get_atomic_local_gradient_kernel(X_train, X_test, dX_test, Q_train, Q_test, SIGMA) C = np.concatenate((Kte, Kt)) Y = np.concatenate((E_train, F_train.flatten())) end = time() print(end - start) print("") print("Alphas operator ...") start = time() alpha = svd_solve(C, Y, rcond=1e-12) end = time() print(end - start) print("") print("save X") np.save('X_active_learning.npy', X_train) # with open("X_mp2.cpickle", 'wb') as f: # cPickle.dump(X_train, f, protocol=2) print("save alphas") np.save('alphas_active_learning.npy', alpha) # with open("alphas_mp2.cpickle", 'wb') as f: # cPickle.dump(alpha, f, protocol=2) print("save Q") np.save('Q_active_learning.npy', Q_train) # with open("Q_mp2.cpickle", 'wb') as f: # cPickle.dump(Q_train, f, protocol=2) eYt = np.dot(Kte, alpha) fYt = np.dot(Kt, alpha) #eYt_test = np.dot(Kte_test, alpha) #fYt_test = np.dot(Kt_test, alpha) slope, intercept, r_value, p_value, std_err = scipy.stats.linregress( E_train, eYt) print("TRAINING ENERGY MAE = %10.4f slope = %10.4f intercept = %10.4f r^2 = %9.6f" % \ (np.mean(np.abs(E_train - eYt)), slope, intercept, r_value )) slope, intercept, r_value, p_value, std_err = scipy.stats.linregress( F_train.flatten(), fYt.flatten()) print("TRAINING FORCE MAE = %10.4f slope = %10.4f intercept = %10.4f r^2 = %9.6f" % \ (np.mean(np.abs(F_train.flatten() - fYt.flatten())), slope, intercept, r_value ))
def train_alphas(reps, dreps, nuclear_charges, E, F, train_idx, parameters): print(reps.shape) all_idx = np.array(list(range(4001))) test_idx = np.array([i for i in all_idx if i not in train_idx]) print(train_idx) print(test_idx) natoms = reps.shape[1] nmols = len(E) atoms = np.array([i for i in range(natoms * 3)]) train_idx_force = np.array( [atoms + (3 * natoms) * j + nmols for j in train_idx]).flatten() test_idx_force = np.array( [atoms + (3 * natoms) * j + nmols for j in test_idx]).flatten() idx = np.concatenate((train_idx, train_idx_force)) n_train = len(train_idx) n_test = len(test_idx) X = reps[train_idx] Xs = reps[test_idx] dX = dreps[train_idx] dXs = dreps[test_idx] Q = [nuclear_charges[i] for i in train_idx] Qs = [nuclear_charges[i] for i in test_idx] Ke = get_atomic_local_kernel(X, X, Q, Q, parameters["sigma"]) Kf = get_atomic_local_gradient_kernel(X, X, dX, Q, Q, parameters["sigma"]) C = np.concatenate((Ke, Kf)) Kes = get_atomic_local_kernel(X, Xs, Q, Qs, parameters["sigma"]) Kfs = get_atomic_local_gradient_kernel(X, Xs, dXs, Q, Qs, parameters["sigma"]) Y = np.concatenate((E[train_idx], F[train_idx].flatten())) alphas = svd_solve(C, Y, rcond=parameters["llambda"]) eYs = deepcopy(E[test_idx]) fYs = deepcopy(F[test_idx]).flatten() eYss = np.dot(Kes, alphas) fYss = np.dot(Kfs, alphas) ermse_test = np.sqrt(np.mean(np.square(eYss - eYs))) emae_test = np.mean(np.abs(eYss - eYs)) frmse_test = np.sqrt(np.mean(np.square(fYss - fYs))) fmae_test = np.mean(np.abs(fYss - fYs)) schnet_score = 0.01 * sum(np.square(eYss - eYs)) schnet_score += sum(np.square(fYss - fYs)) / natoms print("TEST %5.2f %.2E %6.4e %10.8f %10.8f %10.8f %10.8f" % \ (parameters["sigma"], parameters["llambda"], schnet_score, emae_test, ermse_test, fmae_test, frmse_test)) return alphas
def query(self, atoms=None, print_time=True): if print_time: start = time.time() # kcal/mol til ev # kcal/mol/aangstrom til ev / aangstorm conv_energy = 1.0 #0.0433635093659 conv_force = 1.0 # 0.0433635093659 coordinates = atoms.get_positions() nuclear_charges = atoms.get_atomic_numbers() n_atoms = coordinates.shape[0] rep_start = time.time() rep, drep = generate_fchl_acsf( nuclear_charges, coordinates, gradients=True, elements=[1, 6, 8], pad=self.max_atoms, ) Qs = [nuclear_charges] Xs = np.array([rep], order="F") dXs = np.array([drep], order="F") if self.reducer is not None: Xs = np.einsum("ijk,kl->ijl", Xs, self.reducer) dXs = np.einsum("ijkmn,kl->ijlmn", dXs, self.reducer) rep_end = time.time() kernel_start = time.time() # Ks = get_gp_kernel(self.repr, Xs, self.drepr, dXs, self.charges, Qs, self.sigma) Kse = get_atomic_local_kernel(self.repr, Xs, self.charges, Qs, self.sigma) Ksf = get_atomic_local_gradient_kernel(self.repr, Xs, dXs, self.charges, Qs, self.sigma) kernel_end = time.time() pred_start = time.time() # Energy prediction energy_predicted = np.dot(Kse, self.alphas)[0] + self.offset self.energy = energy_predicted * conv_energy # Force prediction forces_predicted = np.dot(Ksf, self.alphas).reshape((n_atoms, 3)) self.forces = forces_predicted * conv_force pred_end = time.time() if print_time: end = time.time() # print("rep ", rep_end - rep_start) # print("kernel ", kernel_end - kernel_start) # print("prediciton ", pred_end - pred_start) # print("qml query {:7.3f}s {:10.3f} ".format(end-start, energy_predicted)) return
def predict_only(): # Initialize training data (only need to do this once) alpha = np.load("data/training_alphas.npy") X = np.load("data/training_X.npy") Q = np.load("data/training_Q.npy") # Define a molecule nuclear_charges = np.array([6, 6, 8, 1, 1, 1, 1, 1, 1]) coordinates = np.array([[0.07230959, 0.61441211, -0.03115568], [-1.26644639, -0.27012846, -0.00720771], [1.11516977, -0.30732869, 0.06414394], [0.10673943, 1.44346835, -0.79573006], [-0.02687486, 1.19350887, 0.98075343], [-2.06614011, 0.38757505, 0.39276693], [-1.68213881, -0.60620688, -0.97804526], [-1.18668224, -1.07395366, 0.67075071], [1.37492532, -0.56618891, -0.83172035]]) # Generate representation max_atoms = X.shape[1] (rep, drep) = generate_fchl_acsf(nuclear_charges, coordinates, gradients=True, pad=max_atoms) # Put data into arrays Qs = [nuclear_charges] Xs = np.array([rep]) dXs = np.array([drep]) SIGMA = 10.0 # Get kernels Kse = get_atomic_local_kernel(X, Xs, Q, Qs, SIGMA) Ks = get_atomic_local_gradient_kernel(X, Xs, dXs, Q, Qs, SIGMA) # Offset from training offset = -97084.83100465109 # Energy prediction energy_predicted = np.dot(Kse, alpha)[0] + offset energy_true = -97086.55524903 print("True energy %16.4f kcal/mol" % energy_true) print("Predicted energy %16.4f kcal/mol" % energy_predicted) # Force prediction forces_predicted = np.dot(Ks, alpha).reshape((len(nuclear_charges), 3)) forces_true = np.array([[-66.66673100, 2.45752385, 49.92224945], [-17.98600137, 68.72856500, -28.82689294], [31.88432927, 8.98739402, -18.11946195], [4.19798833, -31.31692744, 8.12825145], [16.78395377, -24.76072606, -38.99054658], [6.03046276, -7.24928076, -3.88797517], [17.44954868, 0.21604968, 8.56118603], [11.73901551, -19.38200606, 13.26191987], [-3.43256595, 2.31940789, 9.95126984]]) print("True forces [kcal/mol]") print(forces_true) print("Predicted forces [kcal/mol]") print(forces_predicted)
def test_fchl_acsf_operator_dft(): SIGMA = 10.0 Xall, dXall, Qall, Eall, Fall = csvdir_to_reps("csv_data") idx = list(range(len(Eall))) np.random.shuffle(idx) print(len(idx)) train = idx[:100] test = idx[100:] print("train = ", len(train), " test = ", len(test)) X = Xall[train] dX = dXall[train] Q = [Qall[i] for i in train] E = Eall[train] F = [Fall[i] for i in train] Xs = Xall[test] dXs = dXall[test] Qs = [Qall[i] for i in test] Es = Eall[test] Fs = [Fall[i] for i in test] print("Representations ...") F = np.concatenate(F) Fs = np.concatenate(Fs) print("Kernels ...") Kte = get_atomic_local_kernel(X, X, Q, Q, SIGMA) Kse = get_atomic_local_kernel(X, Xs, Q, Qs, SIGMA) Kt = get_atomic_local_gradient_kernel(X, X, dX, Q, Q, SIGMA) Ks = get_atomic_local_gradient_kernel(X, Xs, dXs, Q, Qs, SIGMA) C = np.concatenate((Kte, Kt)) Y = np.concatenate((E, F.flatten())) print("Alphas operator ...") alpha = svd_solve(C, Y, rcond=1e-11) eYt = np.dot(Kte, alpha) eYs = np.dot(Kse, alpha) fYt = np.dot(Kt, alpha) fYs = np.dot(Ks, alpha) print( "===============================================================================================" ) print( "==== OPERATOR, FORCE + ENERGY ===============================================================" ) print( "===============================================================================================" ) slope, intercept, r_value, p_value, std_err = scipy.stats.linregress( E, eYt) print("TRAINING ENERGY MAE = %10.4f slope = %10.4f intercept = %10.4f r^2 = %9.6f" % \ (np.mean(np.abs(E - eYt)), slope, intercept, r_value )) slope, intercept, r_value, p_value, std_err = scipy.stats.linregress( F.flatten(), fYt.flatten()) print("TRAINING FORCE MAE = %10.4f slope = %10.4f intercept = %10.4f r^2 = %9.6f" % \ (np.mean(np.abs(F.flatten() - fYt.flatten())), slope, intercept, r_value )) slope, intercept, r_value, p_value, std_err = scipy.stats.linregress( Es.flatten(), eYs.flatten()) print("TEST ENERGY MAE = %10.4f slope = %10.4f intercept = %10.4f r^2 = %9.6f" % \ (np.mean(np.abs(Es - eYs)), slope, intercept, r_value )) slope, intercept, r_value, p_value, std_err = scipy.stats.linregress( Fs.flatten(), fYs.flatten()) print("TEST FORCE MAE = %10.4f slope = %10.4f intercept = %10.4f r^2 = %9.6f" % \ (np.mean(np.abs(Fs.flatten() - fYs.flatten())), slope, intercept, r_value ))
def test_fchl_acsf_operator_ccsd(): SIGMA = 10.0 X, dX, Q, E, F = get_data_from_file(FILENAME_TRAIN, n=40) Xs, dXs, Qs, Es, Fs = get_data_from_file(FILENAME_TEST, n=20) offset = E.mean() E -= offset Es -= offset print("Representations ...") F = np.concatenate(F) Fs = np.concatenate(Fs) print("Kernels ...") Kte = get_atomic_local_kernel(X, X, Q, Q, SIGMA) Kse = get_atomic_local_kernel(X, Xs, Q, Qs, SIGMA) Kt = get_atomic_local_gradient_kernel(X, X, dX, Q, Q, SIGMA) Ks = get_atomic_local_gradient_kernel(X, Xs, dXs, Q, Qs, SIGMA) C = np.concatenate((Kte, Kt)) Y = np.concatenate((E, F.flatten())) print("Alphas operator ...") alpha = svd_solve(C, Y, rcond=1e-11) eYt = np.dot(Kte, alpha) eYs = np.dot(Kse, alpha) fYt = np.dot(Kt, alpha) fYs = np.dot(Ks, alpha) print( "===============================================================================================" ) print( "==== OPERATOR, FORCE + ENERGY ===============================================================" ) print( "===============================================================================================" ) slope, intercept, r_value, p_value, std_err = scipy.stats.linregress( E, eYt) print("TRAINING ENERGY MAE = %10.4f slope = %10.4f intercept = %10.4f r^2 = %9.6f" % \ (np.mean(np.abs(E - eYt)), slope, intercept, r_value )) slope, intercept, r_value, p_value, std_err = scipy.stats.linregress( F.flatten(), fYt.flatten()) print("TRAINING FORCE MAE = %10.4f slope = %10.4f intercept = %10.4f r^2 = %9.6f" % \ (np.mean(np.abs(F.flatten() - fYt.flatten())), slope, intercept, r_value )) slope, intercept, r_value, p_value, std_err = scipy.stats.linregress( Es.flatten(), eYs.flatten()) print("TEST ENERGY MAE = %10.4f slope = %10.4f intercept = %10.4f r^2 = %9.6f" % \ (np.mean(np.abs(Es - eYs)), slope, intercept, r_value )) slope, intercept, r_value, p_value, std_err = scipy.stats.linregress( Fs.flatten(), fYs.flatten()) print("TEST FORCE MAE = %10.4f slope = %10.4f intercept = %10.4f r^2 = %9.6f" % \ (np.mean(np.abs(Fs.flatten() - fYs.flatten())), slope, intercept, r_value ))