eigt = [(eigh(M, eigvals_only=True))[::-1] for M in Xtest] print "TIMER eval_features", time.clock() - ttt # Observations y = T.ravel() alpha = [] covmat = [] for theta0 in [10.0**i for i in sp.linspace(-2,5,7)]: # sp.linspace(1,1, N_models): # Setup a Gaussian Process model ttt = time.clock() gp = GaussianProcess(corr='absolute_exponential', theta0=sp.asarray([theta0]), nugget=1e-3, verbose=True, low_memory=False) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(eigX, y) print "TIMER teach", time.clock() - ttt ttt = time.clock() # # Make the prediction on training set # y_pred, MSE = gp.predict(eigX, eval_MSE=True) # sigma = sp.sqrt(MSE) # print('\n training set:') # print('MAE: %5.2f kcal/mol' % sp.absolute(y_pred-y).mean(axis=0)) # print('RMSE: %5.2f kcal/mol' % sp.square(y_pred-y).mean(axis=0)**.5) # Make the prediction on test set y_pred, MSE = gp.predict(eigt, eval_MSE=True) sigma = sp.sqrt(MSE) print('\n test set:') print('MAE: %5.2f kcal/mol' % sp.absolute(y_pred-Ttest.ravel()).mean(axis=0)) print('RMSE: %5.2f kcal/mol' %sp.square(y_pred-Ttest.ravel()).mean(axis=0)**.5)
# -------------------------------------------- # Do len(y_t) teachings by including db + 1 configurations # -------------------------------------------- alphas = teach_database_plusone(gp, eigX_db, y_db, eigX_t, y_t) alpha_target.append(alphas) # -------------------------------------------- # -------------------------------------------- # Second time don't include the test set and predict # -------------------------------------------- ttt = time.clock() gp.flush_data() # Fit to data gp.fit(eigX_db, y_db) print "TIMER teach", time.clock() - ttt beta = sp.dot(gp.inverse, gp.alpha) y_pred, k = gp.predict(eigX_t, return_k=True) # -------------------------------------------- # predict the alphas the K-1 * K-1 * k way # -------------------------------------------- alpha_predicted.append(sp.dot(k, beta.flatten())) energy_target.append(y_t) energy_error.append(y_pred - y_t) # check whether the ML itself is doing sensible things print "ERROR = ", energy_error[-1] print "ALPHA TRUE vs. PREDICTED:", alphas, alpha_predicted[-1]
# -------------------------------------------- # Do len(y_t) teachings by including db + 1 configurations # -------------------------------------------- alphas = teach_database_plusone(gp, eigX_db, y_db, eigX_t, y_t) alpha_target.append(alphas) gp.flush_data() # -------------------------------------------- # -------------------------------------------- # Second time don't include the test set and predict # -------------------------------------------- ttt = time.clock() # Fit to data gp.fit(eigX_db, y_db) gp_level2.fit(eigX_db, gp.alpha.flatten()) print "TIMER teach", time.clock() - ttt y_pred = gp.predict(eigX_t) # -------------------------------------------- # predict the alphas # -------------------------------------------- alpha_pred = gp_level2.predict(eigX_t) alpha_predicted.append(alpha_pred.flatten()) energy_target.append(y_t) energy_error.append(y_pred - y_t)
y = T.ravel() alpha = [] covmat = [] for theta0 in [10.0**i for i in sp.linspace(-2, 5, 7)]: # sp.linspace(1,1, N_models): # Setup a Gaussian Process model ttt = time.clock() gp = GaussianProcess(corr='absolute_exponential', theta0=sp.asarray([theta0]), nugget=1e-3, verbose=True, low_memory=False) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(eigX, y) print "TIMER teach", time.clock() - ttt ttt = time.clock() # # Make the prediction on training set # y_pred, MSE = gp.predict(eigX, eval_MSE=True) # sigma = sp.sqrt(MSE) # print('\n training set:') # print('MAE: %5.2f kcal/mol' % sp.absolute(y_pred-y).mean(axis=0)) # print('RMSE: %5.2f kcal/mol' % sp.square(y_pred-y).mean(axis=0)**.5) # Make the prediction on test set y_pred, MSE = gp.predict(eigt, eval_MSE=True) sigma = sp.sqrt(MSE) print('\n test set:') print('MAE: %5.2f kcal/mol' % sp.absolute(y_pred - Ttest.ravel()).mean(axis=0))
# -------------------------------------------- # Do len(y_t) teachings by including db + 1 configurations # -------------------------------------------- alphas = teach_database_plusone(gp, eigX_db, y_db, eigX_t, y_t) alpha_target.append(alphas) gp.flush_data() # -------------------------------------------- # -------------------------------------------- # Second time don't include the test set and predict # -------------------------------------------- ttt = time.clock() # Fit to data gp.fit(eigX_db, y_db) gp_level2.fit(eigX_db, gp.alpha.flatten()) print "TIMER teach", time.clock() - ttt y_pred = gp.predict(eigX_t) # -------------------------------------------- # predict the alphas # -------------------------------------------- alpha_pred = gp_level2.predict(eigX_t) alpha_predicted.append(alpha_pred.flatten()) energy_target.append(y_t) energy_error.append(y_pred - y_t)