def plot_diff(x_list: list, y_list: list): fontsize = 27 from neutromeratio.analysis import compute_kl_divergence, bootstrap_rmse_r plt.figure(figsize=[8, 8], dpi=300) plt.subplot(211) sns.distplot(x_list, kde=True, rug=True, bins=15, label="$\Delta G_{solv}^{exp}$") sns.distplot(y_list, kde=True, rug=True, bins=15, label="$\Delta G_{solv}^{calc}$") plt.xlabel("$\Delta G_{solv}$ [kcal/mol]", fontsize=fontsize) plt.ylabel("Probability", fontsize=fontsize) plt.axvline(0, 0, 15, color="red") kl = compute_kl_divergence(np.array(x_list), np.array(y_list)) rmse, mae, rho = bootstrap_rmse_r(np.array(x_list), np.array(y_list), 1000) plt.text(-13.0, 0.15, f"MAE$ = {mae}$", fontsize=fontsize) plt.text(-13.0, 0.13, f"RMSE$ = {rmse}$", fontsize=fontsize) plt.text(-13.0, 0.11, f"KL$ = {kl:.2f}$", fontsize=fontsize) ax = plt.gca() plt.setp(ax.get_xticklabels(), fontsize=18) plt.setp(ax.get_yticklabels(), fontsize=18) plt.legend(fontsize=fontsize) plt.ylabel("Probability") plt.yticks([]) plt.tight_layout() plt.subplot(212) sns.distplot( np.array(x_list) - np.array(y_list), kde=True, rug=True, bins=15, label="$\Delta G_{solv}^{exp} - \Delta G_{solv}^{calc}$", ) plt.xlabel("$\Delta G_{solv}^{exp} - \Delta G_{solv}^{calc}$ [kcal/mol]", fontsize=fontsize) plt.ylabel("Probability", fontsize=fontsize) plt.axvline(0, 0, 15, color="red") kl = compute_kl_divergence(np.array(x_list), np.array(y_list)) rmse, mae, rho = bootstrap_rmse_r(np.array(x_list), np.array(y_list), 1000) plt.legend(fontsize=fontsize) plt.yticks([]) plt.ylabel("Probability") ax = plt.gca() plt.setp(ax.get_xticklabels(), fontsize=18) plt.setp(ax.get_yticklabels(), fontsize=18) plt.tight_layout() plt.show()
def plot_single_dist(x_list, y_list, label): fontsize = 27 plt.figure(figsize=[8, 8], dpi=300) plt.ylabel("Probability", fontsize=fontsize) from neutromeratio.analysis import compute_kl_divergence, bootstrap_rmse_r sns.distplot( np.array(x_list) - np.array(y_list), kde=True, rug=True, bins=15, label="Boltzmann weighting - Minimum", ) rmse, mae, rho = bootstrap_rmse_r(np.array(x_list), np.array(y_list), 1000) kl = compute_kl_divergence(np.array(x_list), np.array(y_list)) # plt.text(-1.5, 1.5, f"RMSE$ = {rmse}$", fontsize=fontsize) # plt.text(-1.5, 1.35, f"MAE$ = {mae}$", fontsize=fontsize) # plt.text(8.0, 0.04, f"KL$ = {kl:.2f}$", fontsize=fontsize) plt.xlabel(label, fontsize=fontsize) plt.axvline(0, 0, 15, color="red") # plt.legend(fontsize=fontsize) plt.yticks([]) plt.show()
def plot_dist(): fontsize = 17 from neutromeratio.analysis import compute_kl_divergence, bootstrap_rmse_r import seaborn as sns sns.distplot(x_list, kde=True, rug=True, bins=15, label="befor optimization") sns.distplot(y_list, kde=True, rug=True, bins=15, label="befor optimization") rmse, mae, rho = bootstrap_rmse_r(np.array(x_list), np.array(y_list), 1000) kl = compute_kl_divergence(np.array(x_list), np.array(y_list)) plt.text(8.0, 0.10, f"MAE$ = {mae}$", fontsize=fontsize) plt.text(8.0, 0.09, f"KL$ = {kl:.2f}$", fontsize=fontsize) plt.xlabel("$\Delta_{r}G_{solv}$", fontsize=fontsize) plt.ylabel("Probability", fontsize=fontsize) plt.show()
def analyse_optimization(base_path: str): import seaborn as sns plt.style.use("seaborn-deep") from scipy.stats import entropy from neutromeratio.analysis import compute_kl_divergence, bootstrap_rmse_r all_names = _get_names() # load the model perofrmance before optimization on the training/validation set results_before_retraining = pickle.load( open(base_path + "/results_before_training.pickle", "rb+")) # results_before_retraining is a dictionary of lists, the keys are the mol names, the values are a list of [ddG, validation/training] # load the ddG values for the test set before and after training results_test_set = pickle.load( open(base_path + "/results_AFTER_training_for_test_set.pickle", "rb+")) # results_test_set is a dictionary of lists, the key are the mol anmes, the values are [experimental_ddG, before_optimization_ddG, after_optimization_ddG] # get everythin in four lists original_ = [] reweighted_ = [] exp_ = [] names_ = [] exp_results = _get_exp_results() for n in results_test_set: original, reweighted = results_test_set[n] exp = (exp_results[n]["energy"] * unit.kilocalorie_per_mole ) # already in kcal/mol names_.append(n) original_.append( (original * kT).value_in_unit(unit.kilocalorie_per_mole)) reweighted_.append( (reweighted * kT).value_in_unit(unit.kilocalorie_per_mole)) exp_.append(exp) # plot the distribution of ddG before and after optimization sns.set(color_codes=True) plt.figure(figsize=[8, 8], dpi=300) fontsize = 25 delta_exp_original = np.array(exp_) - np.array(original_) kl = compute_kl_divergence(np.array(exp_), np.array(original_)) rmse, mae, rho = bootstrap_rmse_r(np.array(exp_), np.array(original_), 1000) plt.text( -28.0, 0.175, f"RMSE$ = {rmse}$", fontsize=fontsize, color=sns.xkcd_rgb["denim blue"], ) plt.text( -28.0, 0.16, f"MAE$ = {mae}$", fontsize=fontsize, color=sns.xkcd_rgb["denim blue"], ) plt.text( -28.0, 0.145, f"KL$ = {kl:.2f}$", fontsize=fontsize, color=sns.xkcd_rgb["denim blue"], ) sns.distplot( delta_exp_original, kde=True, rug=True, bins=15, label="ANI1ccx native", color=sns.xkcd_rgb["denim blue"], ) delta_exp_reweighted = np.array(exp_) - np.array(reweighted_) rmse, mae, rho = bootstrap_rmse_r(np.array(exp_), np.array(reweighted_), 1000) kl = compute_kl_divergence(np.array(exp_), np.array(reweighted_)) plt.text( -28.0, 0.12, f"RMSE$ = {rmse}$", fontsize=fontsize, color=sns.xkcd_rgb["pale red"], ) plt.text( -28.0, 0.105, f"MAE$ = {mae}$", fontsize=fontsize, color=sns.xkcd_rgb["pale red"], ) plt.text( -28.0, 0.09, f"KL$ = {kl:.2f}$", fontsize=fontsize, color=sns.xkcd_rgb["pale red"], ) sns.distplot( delta_exp_reweighted, kde=True, rug=True, bins=15, label="ANI1ccx optimized", color=sns.xkcd_rgb["pale red"], ) plt.legend(fontsize=fontsize - 5) plt.ylabel("Probability", fontsize=fontsize) plt.xlabel( "$\Delta_{r} G_{solv}^{exp} - \Delta_{r} G_{vac}^{calc}$ [kcal/mol]", fontsize=fontsize, ) plt.xticks(fontsize=20) plt.yticks(fontsize=20) plt.yticks([]) plt.show()