def plot_diff(x_list: list, y_list: list):
    fontsize = 27
    from neutromeratio.analysis import compute_kl_divergence, bootstrap_rmse_r

    plt.figure(figsize=[8, 8], dpi=300)

    plt.subplot(211)
    sns.distplot(x_list,
                 kde=True,
                 rug=True,
                 bins=15,
                 label="$\Delta G_{solv}^{exp}$")
    sns.distplot(y_list,
                 kde=True,
                 rug=True,
                 bins=15,
                 label="$\Delta G_{solv}^{calc}$")

    plt.xlabel("$\Delta G_{solv}$ [kcal/mol]", fontsize=fontsize)
    plt.ylabel("Probability", fontsize=fontsize)
    plt.axvline(0, 0, 15, color="red")
    kl = compute_kl_divergence(np.array(x_list), np.array(y_list))
    rmse, mae, rho = bootstrap_rmse_r(np.array(x_list), np.array(y_list), 1000)
    plt.text(-13.0, 0.15, f"MAE$ = {mae}$", fontsize=fontsize)
    plt.text(-13.0, 0.13, f"RMSE$ = {rmse}$", fontsize=fontsize)
    plt.text(-13.0, 0.11, f"KL$ = {kl:.2f}$", fontsize=fontsize)
    ax = plt.gca()
    plt.setp(ax.get_xticklabels(), fontsize=18)
    plt.setp(ax.get_yticklabels(), fontsize=18)
    plt.legend(fontsize=fontsize)
    plt.ylabel("Probability")
    plt.yticks([])
    plt.tight_layout()
    plt.subplot(212)
    sns.distplot(
        np.array(x_list) - np.array(y_list),
        kde=True,
        rug=True,
        bins=15,
        label="$\Delta G_{solv}^{exp} - \Delta G_{solv}^{calc}$",
    )
    plt.xlabel("$\Delta G_{solv}^{exp} - \Delta G_{solv}^{calc}$ [kcal/mol]",
               fontsize=fontsize)
    plt.ylabel("Probability", fontsize=fontsize)
    plt.axvline(0, 0, 15, color="red")
    kl = compute_kl_divergence(np.array(x_list), np.array(y_list))
    rmse, mae, rho = bootstrap_rmse_r(np.array(x_list), np.array(y_list), 1000)
    plt.legend(fontsize=fontsize)
    plt.yticks([])

    plt.ylabel("Probability")
    ax = plt.gca()
    plt.setp(ax.get_xticklabels(), fontsize=18)
    plt.setp(ax.get_yticklabels(), fontsize=18)

    plt.tight_layout()

    plt.show()
def plot_single_dist(x_list, y_list, label):
    fontsize = 27
    plt.figure(figsize=[8, 8], dpi=300)
    plt.ylabel("Probability", fontsize=fontsize)

    from neutromeratio.analysis import compute_kl_divergence, bootstrap_rmse_r

    sns.distplot(
        np.array(x_list) - np.array(y_list),
        kde=True,
        rug=True,
        bins=15,
        label="Boltzmann weighting - Minimum",
    )
    rmse, mae, rho = bootstrap_rmse_r(np.array(x_list), np.array(y_list), 1000)
    kl = compute_kl_divergence(np.array(x_list), np.array(y_list))
    # plt.text(-1.5, 1.5, f"RMSE$ = {rmse}$", fontsize=fontsize)
    # plt.text(-1.5, 1.35, f"MAE$ = {mae}$", fontsize=fontsize)
    # plt.text(8.0, 0.04, f"KL$ = {kl:.2f}$", fontsize=fontsize)
    plt.xlabel(label, fontsize=fontsize)
    plt.axvline(0, 0, 15, color="red")

    # plt.legend(fontsize=fontsize)
    plt.yticks([])
    plt.show()
示例#3
0
def plot_dist():
    fontsize = 17
    from neutromeratio.analysis import compute_kl_divergence, bootstrap_rmse_r
    import seaborn as sns

    sns.distplot(x_list,
                 kde=True,
                 rug=True,
                 bins=15,
                 label="befor optimization")
    sns.distplot(y_list,
                 kde=True,
                 rug=True,
                 bins=15,
                 label="befor optimization")
    rmse, mae, rho = bootstrap_rmse_r(np.array(x_list), np.array(y_list), 1000)
    kl = compute_kl_divergence(np.array(x_list), np.array(y_list))
    plt.text(8.0, 0.10, f"MAE$ = {mae}$", fontsize=fontsize)
    plt.text(8.0, 0.09, f"KL$ = {kl:.2f}$", fontsize=fontsize)
    plt.xlabel("$\Delta_{r}G_{solv}$", fontsize=fontsize)
    plt.ylabel("Probability", fontsize=fontsize)
    plt.show()
示例#4
0
def analyse_optimization(base_path: str):
    import seaborn as sns

    plt.style.use("seaborn-deep")
    from scipy.stats import entropy
    from neutromeratio.analysis import compute_kl_divergence, bootstrap_rmse_r

    all_names = _get_names()
    # load the model perofrmance before optimization on the training/validation set
    results_before_retraining = pickle.load(
        open(base_path + "/results_before_training.pickle", "rb+"))
    # results_before_retraining is a dictionary of lists, the keys are the mol names, the values are a list of [ddG, validation/training]

    # load the ddG values for the test set before and after training
    results_test_set = pickle.load(
        open(base_path + "/results_AFTER_training_for_test_set.pickle", "rb+"))
    # results_test_set is a dictionary of lists, the key are the mol anmes, the values are [experimental_ddG, before_optimization_ddG, after_optimization_ddG]

    # get everythin in four lists
    original_ = []
    reweighted_ = []
    exp_ = []
    names_ = []

    exp_results = _get_exp_results()

    for n in results_test_set:
        original, reweighted = results_test_set[n]
        exp = (exp_results[n]["energy"] * unit.kilocalorie_per_mole
               )  # already in kcal/mol
        names_.append(n)
        original_.append(
            (original * kT).value_in_unit(unit.kilocalorie_per_mole))
        reweighted_.append(
            (reweighted * kT).value_in_unit(unit.kilocalorie_per_mole))
        exp_.append(exp)

    # plot the distribution of ddG before and after optimization

    sns.set(color_codes=True)
    plt.figure(figsize=[8, 8], dpi=300)
    fontsize = 25
    delta_exp_original = np.array(exp_) - np.array(original_)
    kl = compute_kl_divergence(np.array(exp_), np.array(original_))
    rmse, mae, rho = bootstrap_rmse_r(np.array(exp_), np.array(original_),
                                      1000)
    plt.text(
        -28.0,
        0.175,
        f"RMSE$ = {rmse}$",
        fontsize=fontsize,
        color=sns.xkcd_rgb["denim blue"],
    )
    plt.text(
        -28.0,
        0.16,
        f"MAE$ = {mae}$",
        fontsize=fontsize,
        color=sns.xkcd_rgb["denim blue"],
    )
    plt.text(
        -28.0,
        0.145,
        f"KL$ = {kl:.2f}$",
        fontsize=fontsize,
        color=sns.xkcd_rgb["denim blue"],
    )

    sns.distplot(
        delta_exp_original,
        kde=True,
        rug=True,
        bins=15,
        label="ANI1ccx native",
        color=sns.xkcd_rgb["denim blue"],
    )
    delta_exp_reweighted = np.array(exp_) - np.array(reweighted_)
    rmse, mae, rho = bootstrap_rmse_r(np.array(exp_), np.array(reweighted_),
                                      1000)
    kl = compute_kl_divergence(np.array(exp_), np.array(reweighted_))
    plt.text(
        -28.0,
        0.12,
        f"RMSE$ = {rmse}$",
        fontsize=fontsize,
        color=sns.xkcd_rgb["pale red"],
    )
    plt.text(
        -28.0,
        0.105,
        f"MAE$ = {mae}$",
        fontsize=fontsize,
        color=sns.xkcd_rgb["pale red"],
    )
    plt.text(
        -28.0,
        0.09,
        f"KL$ = {kl:.2f}$",
        fontsize=fontsize,
        color=sns.xkcd_rgb["pale red"],
    )
    sns.distplot(
        delta_exp_reweighted,
        kde=True,
        rug=True,
        bins=15,
        label="ANI1ccx optimized",
        color=sns.xkcd_rgb["pale red"],
    )
    plt.legend(fontsize=fontsize - 5)
    plt.ylabel("Probability", fontsize=fontsize)
    plt.xlabel(
        "$\Delta_{r} G_{solv}^{exp} -  \Delta_{r} G_{vac}^{calc}$ [kcal/mol]",
        fontsize=fontsize,
    )
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    plt.yticks([])
    plt.show()