def test_krr_bob(): test_dir = os.path.dirname(os.path.realpath(__file__)) # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames data = get_energies(test_dir + "/data/hof_qm7.txt") # Generate a list of qml.data.Compound() objects mols = [] for xyz_file in sorted(data.keys()): # Initialize the qml.data.Compound() objects mol = qml.data.Compound(xyz=test_dir + "/qm7/" + xyz_file) # Associate a property (heat of formation) with the object mol.properties = data[xyz_file] # This is a Molecular Coulomb matrix sorted by row norm # mol.generate_eigenvalue_coulomb_matrix() mol.generate_coulomb_matrix() mols.append(mol) # Shuffle molecules np.random.seed(666) np.random.shuffle(mols) # Make training and test sets n_test = 2000 n_train = 4000 training = mols[:n_train] test = mols[-n_test:] # List of representations X = np.array([mol.representation for mol in training]) Xs = np.array([mol.representation for mol in test]) # List of properties Y = np.array([mol.properties for mol in training]) Ys = np.array([mol.properties for mol in test]) # Set hyper-parameters sigma = 4000.40 llambda = 1e-11 # Generate training Kernel K = laplacian_kernel(X, X, sigma) # Solve alpha K[np.diag_indices_from(K)] += llambda alpha = cho_solve(K, Y) # Calculate prediction kernel Ks = laplacian_kernel(X, Xs, sigma) Yss = np.dot(Ks.transpose(), alpha) mae = np.mean(np.abs(Ys - Yss)) print(mae)
def test_arad(): test_dir = os.path.dirname(os.path.realpath(__file__)) # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames data = get_energies(test_dir + "/data/hof_qm7.txt") # Generate a list of qml.data.Compound() objects mols = [] for xyz_file in sorted(data.keys())[:10]: # Initialize the qml.data.Compound() objects mol = qml.data.Compound(xyz=test_dir + "/qm7/" + xyz_file) # Associate a property (heat of formation) with the object mol.properties = data[xyz_file] # This is a Molecular Coulomb matrix sorted by row norm mol.representation = generate_arad_representation( mol.coordinates, mol.nuclear_charges) mols.append(mol) sigmas = [25.0] X1 = np.array([mol.representation for mol in mols]) K_local_asymm = get_local_kernels_arad(X1, X1, sigmas) K_local_symm = get_local_symmetric_kernels_arad(X1, sigmas) assert np.allclose(K_local_symm, K_local_asymm), "Symmetry error in local kernels" assert np.invert(np.all(np.isnan( K_local_asymm))), "ERROR: ARAD local symmetric kernel contains NaN" K_global_asymm = get_global_kernels_arad(X1, X1, sigmas) K_global_symm = get_global_symmetric_kernels_arad(X1, sigmas) assert np.allclose(K_global_symm, K_global_asymm), "Symmetry error in global kernels" assert np.invert(np.all(np.isnan( K_global_asymm))), "ERROR: ARAD global symmetric kernel contains NaN" molid = 5 X1 = generate_arad_representation(mols[molid].coordinates, mols[molid].nuclear_charges, size=mols[molid].natoms) XA = X1[:mols[molid].natoms] K_atomic_asymm = get_atomic_kernels_arad(XA, XA, sigmas) K_atomic_symm = get_atomic_symmetric_kernels_arad(XA, sigmas) assert np.allclose(K_atomic_symm, K_atomic_asymm), "Symmetry error in atomic kernels" assert np.invert(np.all(np.isnan( K_atomic_asymm))), "ERROR: ARAD atomic symmetric kernel contains NaN" K_atomic_asymm = get_atomic_kernels_arad(XA, XA, sigmas)
def test_nn_bob(): test_dir = os.path.dirname(os.path.realpath(__file__)) # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames data = get_energies(test_dir + "/data/hof_qm7.txt") # Generate a list of qml.data.Compound() objects mols = [] numbers = dict() for xyz_file in sorted(data.keys()): # Initialize the qml.data.Compound() objects mol = qml.data.Compound(xyz=test_dir + "/qm7/" + xyz_file) # Associate a property (heat of formation) with the object mol.properties = data[xyz_file] # This is a Molecular Coulomb matrix sorted by row norm # mol.generate_eigenvalue_coulomb_matrix() mol.generate_coulomb_matrix() # mol.generate_bob() # print(mol.representation) mols.append(mol) es = np.array([mol.properties for mol in mols]) fc, fa = get_mean_atomic_contribution(mols, es) for i in range(len(mols)): mols[i].properties = fc[i] # Shuffle molecules np.random.seed(666) np.random.shuffle(mols) # Make training and test sets n_test = 2000 n_train = 4000 training = mols[:n_train] test = mols[-n_test:] # List of representations X = np.array([mol.representation for mol in training]) Xs = np.array([mol.representation for mol in test]) # List of properties Y = np.array([mol.properties for mol in training]) Ys = np.array([mol.properties for mol in test]) print(X.shape) print(Y.shape) dtype = torch.float device = torch.device("cuda:0") N, D_in, H, H2 = n_train, X.shape[1], 50, 10 x = torch.from_numpy(X).to(device, torch.float) y = torch.from_numpy(Y.reshape((N, 1))).to(device, torch.float) xs = torch.from_numpy(Xs).to(device, torch.float) ys = torch.from_numpy(Ys.reshape((n_test, 1))).to(device, torch.float) model = NeuralNet(X.shape[1], H, H2).to(device) # Loss and optimizer # criterion = nn.L1Loss() criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) for epoch in range(100000): yt = model(x) if (epoch % 1000 == 0): loss = (y - yt).pow(2).sum() rmsd = torch.sqrt(loss / n_train) * 627.51 mae = (y - yt).abs().sum() * 627.51 / n_train yss = model.forward(xs) rmsd_s = torch.sqrt((yss - ys).pow(2).sum() / n_test) * 627.51 mae_s = (yss - ys).abs().sum() * 627.51 / n_test print(epoch, mae, rmsd, mae_s, rmsd_s) loss = criterion(yt, y) optimizer.zero_grad() loss.backward() optimizer.step() # for epoch in range(100000): # if (epoch % 1000 == 0): # yt = model(x) # # loss =(y - yt).pow(2).sum() # rmsd = torch.sqrt(loss/n_train) * 627.51 # mae = (y - yt).abs().sum() * 627.51 / n_train # yss = model.forward(xs) # # rmsd_s = torch.sqrt((yss - ys).pow(2).sum()/n_test) * 627.51 # mae_s = (yss - ys).abs().sum() * 627.51 / n_test # print(epoch, mae, rmsd, mae_s, rmsd_s) # # def closure(): # yt = model(x) # return (y - yt).pow(2).sum() # optimizer2.zero_grad() # loss.backward(retain_graph=True) # optimizer2.step(closure) print(yt[:10] * 627.51, y[:10] * 627.51)
def test_nn_bob(): test_dir = os.path.dirname(os.path.realpath(__file__)) # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames data = get_energies(test_dir + "/data/hof_qm7.txt") # Generate a list of qml.data.Compound() objects mols = [] numbers = dict() for xyz_file in sorted(data.keys()): # Initialize the qml.data.Compound() objects mol = qml.data.Compound(xyz=test_dir + "/qm7/" + xyz_file) # Associate a property (heat of formation) with the object mol.properties = data[xyz_file] # This is a Molecular Coulomb matrix sorted by row norm mol.generate_eigenvalue_coulomb_matrix() # print(mol.representation) mols.append(mol) # Shuffle molecules # np.random.seed(666) np.random.shuffle(mols) # Make training and test sets n_test = 1000 n_train = 1000 training = mols[:n_train] test = mols[-n_test:] # List of representations X = np.array([mol.representation for mol in training]) Xs = np.array([mol.representation for mol in test]) # List of properties Y = np.array([mol.properties for mol in training]) Ys = np.array([mol.properties for mol in test]) print(X.shape) print(Y.shape) # exit() import torch dtype = torch.float # device = torch.device("cpu") device = torch.device("cuda:0") # Uncomment this to run on GPU N, D_in, H, D_out = n_train, X.shape[1], 64, 1 x = torch.from_numpy(X).to(device, torch.float) y = torch.from_numpy(Y.reshape((N, 1))).to(device, torch.float) xs = torch.from_numpy(Xs).to(device, torch.float) ys = torch.from_numpy(Ys.reshape((n_test, 1))).to(device, torch.float) # print(x.shape) # print(y.shape) # print(x) # print(X) # # Randomly initialize weights # w1 = torch.randn(D_in, H, device=device, dtype=dtype) # w2 = torch.randn(H, D_out, device=device, dtype=dtype) # print(w1.shape) # print(w2.shape) # N, D_in, H, D_out = 64, 1000, 100, 10 # # Create random input and output data # x = torch.randn(N, D_in, device=device, dtype=dtype) # y = torch.randn(N, D_out, device=device, dtype=dtype) # Randomly initialize weights w1 = torch.randn(D_in, H, device=device, dtype=dtype) w2 = torch.randn(H, D_out, device=device, dtype=dtype) learning_rate = 1e-8 for t in range(5000): # Forward pass: compute predicted y h = x.mm(w1) h_relu = h.clamp(min=0) y_pred = h_relu.mm(w2) # Compute and print loss loss = (y_pred - y).pow(2).sum().item() hval = xs.mm(w1) hval_relu = hval.clamp(min=0) yval_pred = hval_relu.mm(w2) loss2 = (yval_pred - ys).pow(2).sum().item() print(t, loss / n_train * 627.51, loss2 / n_test * 627.51) # Backprop to compute gradients of w1 and w2 with respect to loss grad_y_pred = 2.0 * (y_pred - y) grad_w2 = h_relu.t().mm(grad_y_pred) grad_h_relu = grad_y_pred.mm(w2.t()) grad_h = grad_h_relu.clone() grad_h[h < 0] = 0 grad_w1 = x.t().mm(grad_h) # Update weights using gradient descent w1 -= learning_rate * grad_w1 w2 -= learning_rate * grad_w2
def test_krr_gaussian_local_cmat(): test_dir = os.path.dirname(os.path.realpath(__file__)) # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames data = get_energies(test_dir + "/data/hof_qm7.txt") # Generate a list of qml.data.Compound() objects" mols = [] for xyz_file in sorted(data.keys())[:1000]: # Initialize the qml.data.Compound() objects mol = qml.data.Compound(xyz=test_dir + "/qm7/" + xyz_file) # Associate a property (heat of formation) with the object mol.properties = data[xyz_file] # This is a Molecular Coulomb matrix sorted by row norm mol.generate_atomic_coulomb_matrix(size=23, sorting="row-norm") mols.append(mol) # Shuffle molecules np.random.seed(666) np.random.shuffle(mols) # Make training and test sets n_test = 100 n_train = 200 training = mols[:n_train] test = mols[-n_test:] X = np.concatenate([mol.representation for mol in training]) Xs = np.concatenate([mol.representation for mol in test]) N = np.array([mol.natoms for mol in training]) Ns = np.array([mol.natoms for mol in test]) # List of properties Y = np.array([mol.properties for mol in training]) Ys = np.array([mol.properties for mol in test]) # Set hyper-parameters sigma = 724.0 llambda = 10**(-6.5) K = get_local_kernels_gaussian(X, X, N, N, [sigma])[0] assert np.allclose(K, K.T), "Error in local Gaussian kernel symmetry" K_test = np.loadtxt(test_dir + "/data/K_local_gaussian.txt") assert np.allclose( K, K_test), "Error in local Gaussian kernel (vs. reference)" K_test = get_atomic_kernels_gaussian(training, training, [sigma])[0] assert np.allclose(K, K_test), "Error in local Gaussian kernel (vs. wrapper)" # Solve alpha K[np.diag_indices_from(K)] += llambda alpha = cho_solve(K, Y) # Calculate prediction kernel Ks = get_local_kernels_gaussian(Xs, X, Ns, N, [sigma])[0] Ks_test = np.loadtxt(test_dir + "/data/Ks_local_gaussian.txt") # Somtimes a few coulomb matrices differ because of parallel sorting and numerical error # Allow up to 5 molecules to differ from the supplied reference. differences_count = len(set(np.where(Ks - Ks_test > 1e-7)[0])) assert differences_count < 5, "Error in local Laplacian kernel (vs. reference)" # assert np.allclose(Ks, Ks_test), "Error in local Gaussian kernel (vs. reference)" Ks_test = get_atomic_kernels_gaussian(test, training, [sigma])[0] assert np.allclose(Ks, Ks_test), "Error in local Gaussian kernel (vs. wrapper)" Yss = np.dot(Ks, alpha) mae = np.mean(np.abs(Ys - Yss)) print(mae) assert abs(19.0 - mae) < 1.0, "Error in local Gaussian kernel-ridge regression"
def test_krr_laplacian_local_cmat(): test_dir = os.path.dirname(os.path.realpath(__file__)) # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames data = get_energies(test_dir + "/data/hof_qm7.txt") # Generate a list of qml.data.Compound() objects" mols = [] for xyz_file in sorted(data.keys())[:1000]: # Initialize the qml.data.Compound() objects mol = qml.data.Compound(xyz=test_dir + "/qm7/" + xyz_file) # Associate a property (heat of formation) with the object mol.properties = data[xyz_file] # This is a Molecular Coulomb matrix sorted by row norm mol.generate_atomic_coulomb_matrix(size=23, sorting="row-norm") mols.append(mol) # Shuffle molecules np.random.seed(666) np.random.shuffle(mols) # Make training and test sets n_test = 100 n_train = 200 training = mols[:n_train] test = mols[-n_test:] X = np.concatenate([mol.representation for mol in training]) Xs = np.concatenate([mol.representation for mol in test]) N = np.array([mol.natoms for mol in training]) Ns = np.array([mol.natoms for mol in test]) # List of properties Y = np.array([mol.properties for mol in training]) Ys = np.array([mol.properties for mol in test]) # Set hyper-parameters sigma = 10**(3.6) llambda = 10**(-12.0) K = get_local_kernels_laplacian(X, X, N, N, [sigma])[0] assert np.allclose(K, K.T), "Error in local Laplacian kernel symmetry" # Test below will sometimes fail, since sorting occasionally differs due close row-norms # K_test = np.loadtxt(test_dir + "/data/K_local_laplacian.txt") # assert np.allclose(K, K_test), "Error in local Laplacian kernel (vs. reference)" # Solve alpha K[np.diag_indices_from(K)] += llambda alpha = cho_solve(K, Y) # Calculate prediction kernel Ks = get_local_kernels_laplacian(Xs, X, Ns, N, [sigma])[0] # Test below will sometimes fail, since sorting occasionally differs due close row-norms # Ks_test = np.loadtxt(test_dir + "/data/Ks_local_laplacian.txt") # assert np.allclose(Ks, Ks_test), "Error in local Laplacian kernel (vs. reference)" Yss = np.dot(Ks, alpha) mae = np.mean(np.abs(Ys - Yss)) assert abs(8.7 - mae) < 1.0, "Error in local Laplacian kernel-ridge regression"