Exemplo n.º 1
0
def test_laplacian_kernel():

    np.random.seed(666)

    n_train = 25
    n_test = 20

    # List of dummy representations
    X = np.random.rand(n_train, 1000)
    Xs = np.random.rand(n_test, 1000)

    sigma = 100.0

    Ktest = np.zeros((n_train, n_test))

    for i in range(n_train):
        for j in range(n_test):
            Ktest[i, j] = np.exp(np.sum(np.abs(X[i] - Xs[j])) / (-1.0 * sigma))

    K = laplacian_kernel(X, Xs, sigma)

    # Compare two implementations:
    assert np.allclose(K, Ktest), "Error in Laplacian kernel"

    Ksymm = laplacian_kernel(X, X, sigma)

    # Check for symmetry:
    assert np.allclose(Ksymm, Ksymm.T), "Error in Laplacian kernel"
Exemplo n.º 2
0
def test_krr_cmat():

    test_dir = os.path.dirname(os.path.realpath(__file__))

    # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames
    data = get_energies(test_dir + "/data/hof_qm7.txt")

    # Generate a list of qml.Compound() objects
    mols = []

    for xyz_file in sorted(data.keys())[:1000]:

        # Initialize the qml.Compound() objects
        mol = qml.Compound(xyz=test_dir + "/qm7/" + xyz_file)

        # Associate a property (heat of formation) with the object
        mol.properties = data[xyz_file]

        # This is a Molecular Coulomb matrix sorted by row norm
        mol.generate_coulomb_matrix(size=23, sorting="row-norm")

        mols.append(mol)

    # Shuffle molecules
    np.random.seed(666)
    np.random.shuffle(mols)

    # Make training and test sets
    n_test = 300
    n_train = 700

    training = mols[:n_train]
    test = mols[-n_test:]

    # List of representations
    X = np.array([mol.representation for mol in training])
    Xs = np.array([mol.representation for mol in test])

    # List of properties
    Y = np.array([mol.properties for mol in training])
    Ys = np.array([mol.properties for mol in test])

    # Set hyper-parameters
    sigma = 10**(4.2)
    llambda = 10**(-10.0)

    # Generate training Kernel
    K = laplacian_kernel(X, X, sigma)

    # Solve alpha
    K[np.diag_indices_from(K)] += llambda
    alpha = cho_solve(K, Y)

    # Calculate prediction kernel
    Ks = laplacian_kernel(X, Xs, sigma)
    Yss = np.dot(Ks.transpose(), alpha)

    mae = np.mean(np.abs(Ys - Yss))

    assert mae < 6.0, "ERROR: Too high MAE!"
Exemplo n.º 3
0
    def laplacian_kernel_matrix(self, x_training, x_test):

        #create full laplacian kernel matrix
        K = laplacian_kernel(x_test, x_training, self.sigma)

        self.full_kernel_matrix = K

        return (K)
Exemplo n.º 4
0
def get_alphas_python(X, Y, sigma):
    """ Get alpha vectors through python.
    """

    K = laplacian_kernel(X, X, sigma)
    alpha, res, sing, rank = lstsq(K, Y, lapack_driver="gelsd")

    return alpha
Exemplo n.º 5
0
def get_predictions(mols_pred, X, X_pred, alpha, sigma):
  ''' predicts proerties
  '''
  K_pred = laplacian_kernel(X, X_pred, sigma)

  Yss = np.dot(K_pred.T, alpha)

  for i in range(len(Yss)):
    print(str(mols_pred[i].name) + "\t" + str(Yss[i]))
Exemplo n.º 6
0
def get_alphas(X, Y, sigma, llambda):
  ''' calculates the regression coefficient alpha
  '''
  K = laplacian_kernel(X, X, sigma)

  C = deepcopy(K)
  C[np.diag_indices_from(C)] += llambda

  alpha = cho_solve(C, Y)

  return alpha
Exemplo n.º 7
0
def test_kernels():

    import sys
    import numpy as np
    import qml
    from qml.kernels import laplacian_kernel
    from qml.kernels import gaussian_kernel

    n_train = 25
    n_test = 20

    # List of dummy representations
    X = np.random.rand(n_train, 1000)
    Xs = np.random.rand(n_test, 1000)

    sigma = 100.0

    Gtest = np.zeros((n_train, n_test))
    Ltest = np.zeros((n_train, n_test))


    for i in range(n_train):
        for j in range(n_test):
            Gtest[i,j] = np.exp( np.sum(np.square(X[i] - Xs[j])) / (-2.0 * sigma**2))

            Ltest[i,j] = np.exp( np.sum(np.abs(X[i] - Xs[j])) / (-1.0 * sigma))

    G = gaussian_kernel(X, Xs, sigma)
    L = laplacian_kernel(X, Xs, sigma)

    # Compare two implementations:
    assert np.allclose(G, Gtest), "Error in Gaussian kernel"
    assert np.allclose(L, Ltest), "Error in Laplacian kernel"

    Gsymm = gaussian_kernel(X, X, sigma)
    Lsymm = laplacian_kernel(X, X, sigma)

    # Check for symmetry:
    assert np.allclose(Gsymm, Gsymm.T), "Error in Gaussian kernel"
    assert np.allclose(Lsymm, Lsymm.T), "Error in Laplacian kernel"
Exemplo n.º 8
0
def get_learning_curve(X, X_test, Y, Y_test, sigma, llambda, Ntot):
  ''' generate data (predictions) for learning curves
  '''
  K			 = laplacian_kernel(X, X,      sigma)
  K_test = laplacian_kernel(X, X_test, sigma)

  N = []
  j = 10

  while(j < Ntot):
    N.append(j)
    j *= 2 

  N.append(Ntot)

  random.seed(667)

  for train in N:
    maes = []

    for i in range(10):
      split = range(Ntot)
      random.shuffle(split)

      training_index = split[:train]

      y = Y[training_index]

      C = deepcopy(K[training_index][:,training_index])
      C[np.diag_indices_from(C)] += llambda 
                                                 
      alpha = cho_solve(C, y)                          

      Yss = np.dot(K_test[training_index].T, alpha)

      diff = Yss - Y_test
      mae = np.mean(np.abs(diff))
      maes.append(mae)

    print(str(train) + "\t" + str(sum(maes)/len(maes)))
Exemplo n.º 9
0
def get_kernel(X1, X2, charges1, charges2, sigma=1, mode="local"):
    """

    mode local or atomic
    """

    if len(X1.shape) > 2:

        K = get_atomic_local_kernel(X1, X2, charges1, charges2, sigma)

    else:

        K = laplacian_kernel(X2, X1, sigma)

    return K
Exemplo n.º 10
0
def cross_validation(X, Y, sigmas, llambdas, Ntot):
  """ finds optimal hyperparameters sigma & lambda using cross validation
  """
  parameters = []
  random.seed(666)

  for i in range(len(sigmas)):
    K = laplacian_kernel(X, X, sigmas[i])

    for j in range(len(llambdas)):
  
      for m in range(5):
        maes = []
        split = range(Ntot)
        random.shuffle(split)

        train = int(len(split)*0.8)
        test  = int(Ntot - train)

        training_index  = split[:train]
        test_index      = split[-test:]

        y_train = Y[training_index]
        y_test  = Y[test_index]

        C = deepcopy(K[training_index][:,training_index])
        C[np.diag_indices_from(C)] += llambdas[j]

        alpha = cho_solve(C, y_train)

        y_est = np.dot((K[training_index][:,test_index]).T, alpha)

        diff = y_est  - y_test
        mae = np.mean(np.abs(diff))
        maes.append(mae)

      parameters.append([llambdas[j], sigmas[i], np.mean(maes)])

  maes = [mae[2] for mae in parameters]
  index = maes.index(min(maes))

  print("minimum MAE after CV: ", min(maes))

  return parameters[index][0], parameters[index][1]
Exemplo n.º 11
0
    # List of representations
    X = np.array([mol.coulomb_matrix for mol in training])
    Xs = np.array([mol.coulomb_matrix for mol in test])

    # List of properties
    Y = np.array([mol.properties for mol in training])
    Ys = np.array([mol.properties for mol in test])

    # Set hyper-parameters
    sigma = 10**(4.2)
    llambda = 10**(-10.0)

    # Generate training Kernel
    print("Calculating training kernel ...")
    K = laplacian_kernel(X, X, sigma)

    # Solve alpha
    print("Solving alphas ...")
    K[np.diag_indices_from(K)] += llambda
    alpha = cho_solve(K, Y)

    # Calculate prediction kernel
    print("Calculating prediction kernel ...")
    Ks = laplacian_kernel(X, Xs, sigma)
    Yss = np.dot(Ks.transpose(), alpha)

    # Print final RMSD
    rmsd = np.sqrt(np.mean(np.square(Ys - Yss)))
    print("RMSD = %6.2f kcal/mol" % rmsd)
Exemplo n.º 12
0
test_filenames = filenames[500:750]

# hyper parameters
sigmas = [1.0, 10.0, 10.0**2, 10.0**3]
cutoffs = [2.0, 3.0, 4.0]
llambda = 1e-8  # doesn't usually need to be changed

# try 3 different cutoffs
for cutoff in cutoffs:
    train_x, train_y = get_descriptor_and_property(train_filenames, atype,
                                                   cutoff)
    test_x, test_y = get_descriptor_and_property(test_filenames, atype, cutoff)
    # in this case try out 4 different values of sigma
    for sigma in sigmas:
        # Get the kernel between all descriptors in the training set
        K = laplacian_kernel(train_x, train_x,
                             sigma) + llambda * np.identity(train_x.shape[0])

        # get the KRR prefactors, i.e. this is the training of the network
        alpha = cho_solve(K, train_y)

        # get the kernel between all descriptors in the training set and all in the test set
        Ks = laplacian_kernel(test_x, train_x, sigma)

        # predict values of y
        y_pred = np.dot(Ks, alpha)

        print(
            "predicted MAE of %.4f for sigma: %.4g, cutoff: %.1f and %d training points"
            % (calc_mae(y_pred, test_y), sigma, cutoff, len(train_x)))
Exemplo n.º 13
0
    X = get_rep(names)
    X = X.reshape(len(mols), 2)
    X_test = get_rep(names2)
    X_test = X_test.reshape(len(mols_test), 2)

    Yprime = np.asarray([mol.properties for mol in mols])
    Y_test = np.asarray([mol.properties for mol in mols_test])

    random.seed(667)

    for j in range(len(sigma)):
        print('\n')
        for l in ll:
            print()
            K = laplacian_kernel(X, X, sigma[j])
            K_test = laplacian_kernel(X, X_test, sigma[j])
            for train in N:
                maes = []
                for i in range(nModels):
                    split = list(range(total))
                    random.shuffle(split)

                    training_index = split[:train]

                    Y = Yprime[training_index]

                    C = deepcopy(K[training_index][:, training_index])
                    C[np.diag_indices_from(C)] += l

                    alpha = cho_solve(C, Y)