kde_on_X_test_log_likelihood = np.sum(kde_on_X.score_samples(X_test))
normal_dist_on_X = stats.multivariate_normal(mean=np.mean(X_train, axis=0),
                                             cov=np.cov(X_train.T))
normal_dist_on_X_test_log_likelihood = np.sum(normal_dist_on_X.logpdf(X_test))
print("X shape: ", X.shape)

initial_dags = [
    random_graph.random_dag(X.shape[1], max_deg) for i in range(0, 1)
]

kernel = 'gaussian'

print("kde_on_X_test_log_likelihood: ", kde_on_X_test_log_likelihood)
print("normal dist test log likelihood: ",
      normal_dist_on_X_test_log_likelihood)
#placeholders. Phillip, pls pick good values for these
initial_temp = kde_bayesian_net_log_likelihood.bayesian_net_log_likelihood(
    X_train, X_test, kernel, 0, 0, negative=True)(initial_dags[0])
#generally seems to be the case that the baseline log likelihood as at the least half the intial temperature
final_temp = initial_temp / 2
alpha = (initial_temp - final_temp) / 5000
opt_dag = simulated_annealing.simulated_annealing(initial_dags[0], \
    initial_temp,\
    final_temp,\
    alpha, \
    kde_bayesian_net_log_likelihood.bayesian_net_log_likelihood(X_train, X_test, kernel, 0, 0, negative = True),\
    simulated_annealing_dag.degree_constrained_neighbors_func(max_deg),\
    print_iters = 10)

kde_on_X_test_log_likelihood = np.sum(np.log(kde_on_X.evaluate(X_test.T)))
Пример #2
0
def get_P_binary(proj_sep, delta_v_trans, num_sys=100000, method='kde', kde_method='sklearn'):
    """ This function calculates the probability of a
    random star having the observed proper motion

    Parameters
    ----------
    proj_sep : float
        Projected separation between two stars
    delta_v_trans : float
        Transverse velocity difference between two stars
    method : string
        Method to perform 2D interpolation (options:kde)
    kde_method : string
        Which KDE algorithm to use (options: scipy, sklearn)

    Returns
    -------
    P(proj_sep, delta_v_trans) : float
        Probability that angular separation, pm separation
        is due to a genuine binary
    """

    # Catalog check
    global binary_set

    if binary_set is None:
        generate_binary_set(num_sys=num_sys)

    if method is 'kde':
        # Use a Gaussian KDE
        global binary_kde
        #if binary_kde is None: binary_kde = gaussian_kde((binary_set["proj_sep"], binary_set["delta_v_trans"]))
        # We work in log space for the set of binaries

        if kde_method is not 'sklearn' and kde_method is not 'scipy':
            print("Must use a valid kde algorithm: options are 'sklearn' and 'scipy'")
            print("NOTE: sklean's KDE is the Lotus to scipy's 3-cylinder Pinto")
            return


        if binary_kde is None:
            if kde_method is 'sklearn':
                kwargs = {'kernel':'tophat'}
                binary_kde = KernelDensity(bandwidth=0.1, **kwargs)
                binary_kde.fit( np.array([np.log10(binary_set['proj_sep']), np.log10(binary_set['delta_v_trans'])]).T )
            else:
                binary_kde = gaussian_kde((np.log10(binary_set["proj_sep"]), np.log10(binary_set["delta_v_trans"])))


        if isinstance(delta_v_trans, np.ndarray) and isinstance(proj_sep, np.ndarray):

            if kde_method is 'sklearn':
                values = np.array([np.log10(proj_sep), np.log10(delta_v_trans)]).T
                prob_binary = np.exp(binary_kde.score_samples(values))
            else:
                values = np.array([np.log10(proj_sep), np.log10(delta_v_trans)])
                prob_binary = binary_kde.evaluate(values)


        elif isinstance(delta_v_trans, np.ndarray):

            if kde_method is 'sklearn':
                values = np.array([np.log10(proj_sep)*np.ones(len(delta_v_trans)), np.log10(delta_v_trans)]).T
                prob_binary = np.exp(binary_kde.score_samples(values))
            else:
                values = np.array([np.log10(proj_sep)*np.ones(len(delta_v_trans)), np.log10(delta_v_trans)])
                prob_binary = binary_kde.evaluate(values)

        else:
            if kde_method is 'sklearn':
                prob_binary = np.exp(binary_kde.score_samples([np.log10(proj_sep), np.log10(delta_v_trans)]))
            else:
                prob_binary = binary_kde.evaluate([np.log10(proj_sep), np.log10(delta_v_trans)])

    else:
        print("You must input an appropriate method.")
        print("Options: 'kde' only")
        return

    # Convert back from log10-space to linear-space
    # the log(10) terms convert from log10 to ln
    prob_binary = prob_binary / (proj_sep*np.log(10.)) / (delta_v_trans*np.log(10.))

    return prob_binary
Пример #3
0
def get_P_binary(proj_sep,
                 delta_v_trans,
                 num_sys=100000,
                 method='kde',
                 kde_method='sklearn'):
    """ This function calculates the probability of a
    random star having the observed proper motion

    Parameters
    ----------
    proj_sep : float
        Projected separation between two stars
    delta_v_trans : float
        Transverse velocity difference between two stars
    method : string
        Method to perform 2D interpolation (options:kde)
    kde_method : string
        Which KDE algorithm to use (options: scipy, sklearn)

    Returns
    -------
    P(proj_sep, delta_v_trans) : float
        Probability that angular separation, pm separation
        is due to a genuine binary
    """

    # Catalog check
    global binary_set

    if binary_set is None:
        generate_binary_set(num_sys=num_sys)

    if method is 'kde':
        # Use a Gaussian KDE
        global binary_kde
        #if binary_kde is None: binary_kde = gaussian_kde((binary_set["proj_sep"], binary_set["delta_v_trans"]))
        # We work in log space for the set of binaries

        if kde_method is not 'sklearn' and kde_method is not 'scipy':
            print(
                "Must use a valid kde algorithm: options are 'sklearn' and 'scipy'"
            )
            print(
                "NOTE: sklean's KDE is the Lotus to scipy's 3-cylinder Pinto")
            return

        if binary_kde is None:
            if kde_method is 'sklearn':
                kwargs = {'kernel': 'tophat'}
                binary_kde = KernelDensity(bandwidth=0.1, **kwargs)
                binary_kde.fit(
                    np.array([
                        np.log10(binary_set['proj_sep']),
                        np.log10(binary_set['delta_v_trans'])
                    ]).T)
            else:
                binary_kde = gaussian_kde(
                    (np.log10(binary_set["proj_sep"]),
                     np.log10(binary_set["delta_v_trans"])))

        if isinstance(delta_v_trans, np.ndarray) and isinstance(
                proj_sep, np.ndarray):

            if kde_method is 'sklearn':
                values = np.array(
                    [np.log10(proj_sep),
                     np.log10(delta_v_trans)]).T
                prob_binary = np.exp(binary_kde.score_samples(values))
            else:
                values = np.array(
                    [np.log10(proj_sep),
                     np.log10(delta_v_trans)])
                prob_binary = binary_kde.evaluate(values)

        elif isinstance(delta_v_trans, np.ndarray):

            if kde_method is 'sklearn':
                values = np.array([
                    np.log10(proj_sep) * np.ones(len(delta_v_trans)),
                    np.log10(delta_v_trans)
                ]).T
                prob_binary = np.exp(binary_kde.score_samples(values))
            else:
                values = np.array([
                    np.log10(proj_sep) * np.ones(len(delta_v_trans)),
                    np.log10(delta_v_trans)
                ])
                prob_binary = binary_kde.evaluate(values)

        else:
            if kde_method is 'sklearn':
                prob_binary = np.exp(
                    binary_kde.score_samples(
                        [np.log10(proj_sep),
                         np.log10(delta_v_trans)]))
            else:
                prob_binary = binary_kde.evaluate(
                    [np.log10(proj_sep),
                     np.log10(delta_v_trans)])

    else:
        print("You must input an appropriate method.")
        print("Options: 'kde' only")
        return

    # Convert back from log10-space to linear-space
    # the log(10) terms convert from log10 to ln
    prob_binary = prob_binary / (proj_sep * np.log(10.)) / (delta_v_trans *
                                                            np.log(10.))

    return prob_binary